From 9e28edf96959479a9d343a8fadfd40cfa08735f8 Mon Sep 17 00:00:00 2001
From: Julien Pinsonneau <91894519+jpinsonneau@users.noreply.github.com>
Date: Fri, 20 Dec 2024 12:28:26 +0100
Subject: [PATCH] bump flp dependency (#495)

---
 go.mod                                        |    44 +-
 go.sum                                        |    94 +-
 .../go-ini/ini}/.editorconfig                 |     0
 .../go-ini/ini}/.gitignore                    |     0
 .../go-ini/ini}/.golangci.yml                 |     0
 .../ini.v1 => github.com/go-ini/ini}/LICENSE  |     0
 .../ini.v1 => github.com/go-ini/ini}/Makefile |     0
 .../go-ini/ini}/README.md                     |     0
 .../go-ini/ini}/codecov.yml                   |     0
 .../go-ini/ini}/data_source.go                |     0
 .../go-ini/ini}/deprecated.go                 |     0
 .../ini.v1 => github.com/go-ini/ini}/error.go |     0
 .../ini.v1 => github.com/go-ini/ini}/file.go  |     0
 .../go-ini/ini}/helper.go                     |     0
 .../ini.v1 => github.com/go-ini/ini}/ini.go   |     0
 .../ini.v1 => github.com/go-ini/ini}/key.go   |     0
 .../go-ini/ini}/parser.go                     |     0
 .../go-ini/ini}/section.go                    |     0
 .../go-ini/ini}/struct.go                     |     0
 vendor/github.com/goccy/go-json/.codecov.yml  |    32 +
 vendor/github.com/goccy/go-json/.gitignore    |     2 +
 vendor/github.com/goccy/go-json/.golangci.yml |    86 +
 vendor/github.com/goccy/go-json/CHANGELOG.md  |   425 +
 vendor/github.com/goccy/go-json/LICENSE       |    21 +
 vendor/github.com/goccy/go-json/Makefile      |    39 +
 vendor/github.com/goccy/go-json/README.md     |   529 +
 vendor/github.com/goccy/go-json/color.go      |    68 +
 vendor/github.com/goccy/go-json/decode.go     |   263 +
 .../goccy/go-json/docker-compose.yml          |    13 +
 vendor/github.com/goccy/go-json/encode.go     |   326 +
 vendor/github.com/goccy/go-json/error.go      |    41 +
 .../internal/decoder/anonymous_field.go       |    41 +
 .../goccy/go-json/internal/decoder/array.go   |   176 +
 .../goccy/go-json/internal/decoder/assign.go  |   438 +
 .../goccy/go-json/internal/decoder/bool.go    |    83 +
 .../goccy/go-json/internal/decoder/bytes.go   |   118 +
 .../goccy/go-json/internal/decoder/compile.go |   487 +
 .../internal/decoder/compile_norace.go        |    29 +
 .../go-json/internal/decoder/compile_race.go  |    37 +
 .../goccy/go-json/internal/decoder/context.go |   254 +
 .../goccy/go-json/internal/decoder/float.go   |   170 +
 .../goccy/go-json/internal/decoder/func.go    |   146 +
 .../goccy/go-json/internal/decoder/int.go     |   246 +
 .../go-json/internal/decoder/interface.go     |   528 +
 .../goccy/go-json/internal/decoder/invalid.go |    55 +
 .../goccy/go-json/internal/decoder/map.go     |   280 +
 .../goccy/go-json/internal/decoder/number.go  |   123 +
 .../goccy/go-json/internal/decoder/option.go  |    17 +
 .../goccy/go-json/internal/decoder/path.go    |   670 +
 .../goccy/go-json/internal/decoder/ptr.go     |    97 +
 .../goccy/go-json/internal/decoder/slice.go   |   380 +
 .../goccy/go-json/internal/decoder/stream.go  |   556 +
 .../goccy/go-json/internal/decoder/string.go  |   452 +
 .../goccy/go-json/internal/decoder/struct.go  |   845 +
 .../goccy/go-json/internal/decoder/type.go    |    30 +
 .../goccy/go-json/internal/decoder/uint.go    |   194 +
 .../internal/decoder/unmarshal_json.go        |   104 +
 .../internal/decoder/unmarshal_text.go        |   285 +
 .../internal/decoder/wrapped_string.go        |    73 +
 .../goccy/go-json/internal/encoder/code.go    |  1023 +
 .../goccy/go-json/internal/encoder/compact.go |   286 +
 .../go-json/internal/encoder/compiler.go      |   935 +
 .../internal/encoder/compiler_norace.go       |    32 +
 .../go-json/internal/encoder/compiler_race.go |    45 +
 .../goccy/go-json/internal/encoder/context.go |   105 +
 .../go-json/internal/encoder/decode_rune.go   |   126 +
 .../goccy/go-json/internal/encoder/encoder.go |   596 +
 .../goccy/go-json/internal/encoder/indent.go  |   211 +
 .../goccy/go-json/internal/encoder/int.go     |   176 +
 .../goccy/go-json/internal/encoder/map112.go  |     9 +
 .../goccy/go-json/internal/encoder/map113.go  |     9 +
 .../goccy/go-json/internal/encoder/opcode.go  |   752 +
 .../goccy/go-json/internal/encoder/option.go  |    48 +
 .../goccy/go-json/internal/encoder/optype.go  |   932 +
 .../goccy/go-json/internal/encoder/query.go   |   135 +
 .../goccy/go-json/internal/encoder/string.go  |   483 +
 .../go-json/internal/encoder/string_table.go  |   415 +
 .../go-json/internal/encoder/vm/debug_vm.go   |    41 +
 .../goccy/go-json/internal/encoder/vm/hack.go |     9 +
 .../goccy/go-json/internal/encoder/vm/util.go |   207 +
 .../goccy/go-json/internal/encoder/vm/vm.go   |  4859 ++++
 .../internal/encoder/vm_color/debug_vm.go     |    35 +
 .../go-json/internal/encoder/vm_color/hack.go |     9 +
 .../go-json/internal/encoder/vm_color/util.go |   274 +
 .../go-json/internal/encoder/vm_color/vm.go   |  4859 ++++
 .../encoder/vm_color_indent/debug_vm.go       |    35 +
 .../internal/encoder/vm_color_indent/util.go  |   297 +
 .../internal/encoder/vm_color_indent/vm.go    |  4859 ++++
 .../internal/encoder/vm_indent/debug_vm.go    |    35 +
 .../internal/encoder/vm_indent/hack.go        |     9 +
 .../internal/encoder/vm_indent/util.go        |   230 +
 .../go-json/internal/encoder/vm_indent/vm.go  |  4859 ++++
 .../goccy/go-json/internal/errors/error.go    |   183 +
 .../goccy/go-json/internal/runtime/rtype.go   |   262 +
 .../go-json/internal/runtime/struct_field.go  |    91 +
 .../goccy/go-json/internal/runtime/type.go    |   100 +
 vendor/github.com/goccy/go-json/json.go       |   368 +
 vendor/github.com/goccy/go-json/option.go     |    79 +
 vendor/github.com/goccy/go-json/path.go       |    84 +
 vendor/github.com/goccy/go-json/query.go      |    47 +
 .../grpc-gateway/v2/runtime/convert.go        |     6 +-
 .../grpc-gateway/v2/runtime/errors.go         |    15 +
 .../grpc-gateway/v2/runtime/fieldmask.go      |     2 +-
 .../v2/runtime/marshaler_registry.go          |     4 +-
 .../grpc-gateway/v2/runtime/proto2_convert.go |     4 +-
 .../grpc-gateway/v2/utilities/pattern.go      |     2 +-
 .../v2/utilities/string_array_flag.go         |     2 +-
 .../klauspost/compress/.goreleaser.yml        |     6 +-
 .../github.com/klauspost/compress/README.md   |    29 +-
 .../klauspost/compress/flate/deflate.go       |     2 +-
 .../klauspost/compress/flate/inflate.go       |    74 +-
 .../klauspost/compress/fse/decompress.go      |     2 +-
 .../klauspost/compress/huff0/decompress.go    |     4 +-
 .../klauspost/compress/s2/encode.go           |    25 +-
 .../klauspost/compress/s2/encode_amd64.go     |   201 +-
 .../klauspost/compress/s2/encode_go.go        |     4 +-
 .../compress/s2/encodeblock_amd64.go          |    44 +-
 .../klauspost/compress/s2/encodeblock_amd64.s | 21920 ++++++++--------
 .../klauspost/compress/s2/writer.go           |    31 +-
 .../klauspost/compress/zstd/blockdec.go       |     4 +-
 .../klauspost/compress/zstd/enc_better.go     |    32 +-
 .../klauspost/compress/zstd/enc_dfast.go      |    16 +-
 .../klauspost/compress/zstd/encoder.go        |    45 +-
 .../klauspost/compress/zstd/framedec.go       |     4 +-
 .../klauspost/compress/zstd/seqdec_amd64.go   |     4 +-
 .../klauspost/compress/zstd/seqdec_amd64.s    |     8 +-
 .../klauspost/compress/zstd/zstd.go           |     4 +
 .../github.com/klauspost/cpuid/v2/README.md   |     1 +
 vendor/github.com/klauspost/cpuid/v2/cpuid.go |   429 +-
 .../klauspost/cpuid/v2/detect_x86.go          |     1 +
 .../klauspost/cpuid/v2/featureid_string.go    |   364 +-
 vendor/github.com/minio/minio-go/v7/CREDITS   |   773 +-
 vendor/github.com/minio/minio-go/v7/Makefile  |     4 +
 vendor/github.com/minio/minio-go/v7/README.md |     2 +-
 .../minio/minio-go/v7/api-bucket-cors.go      |   136 +
 .../minio-go/v7/api-bucket-notification.go    |     3 +-
 .../minio/minio-go/v7/api-compose-object.go   |     2 +-
 .../minio/minio-go/v7/api-datatypes.go        |    18 +-
 .../minio/minio-go/v7/api-get-object-file.go  |     2 +-
 .../minio/minio-go/v7/api-get-object.go       |    28 +-
 .../minio/minio-go/v7/api-prompt-object.go    |    78 +
 .../minio/minio-go/v7/api-prompt-options.go   |    84 +
 .../minio-go/v7/api-put-object-fan-out.go     |     5 +-
 .../minio-go/v7/api-put-object-multipart.go   |    54 +-
 .../minio-go/v7/api-put-object-streaming.go   |   169 +-
 .../minio/minio-go/v7/api-put-object.go       |    90 +-
 .../minio-go/v7/api-putobject-snowball.go     |     2 +-
 .../minio/minio-go/v7/api-s3-datatypes.go     |    86 +-
 .../github.com/minio/minio-go/v7/api-stat.go  |    12 +-
 vendor/github.com/minio/minio-go/v7/api.go    |    46 +-
 .../github.com/minio/minio-go/v7/checksum.go  |   223 +-
 vendor/github.com/minio/minio-go/v7/core.go   |     3 +-
 .../minio/minio-go/v7/functional_tests.go     |  3506 +--
 .../minio/minio-go/v7/pkg/cors/cors.go        |    91 +
 .../pkg/credentials/file_aws_credentials.go   |     2 +-
 .../v7/pkg/credentials/file_minio_client.go   |     6 +-
 .../minio-go/v7/pkg/credentials/iam_aws.go    |     6 +-
 .../v7/pkg/credentials/sts_web_identity.go    |    55 +-
 .../minio-go/v7/pkg/encrypt/server-side.go    |     3 +-
 .../minio-go/v7/pkg/lifecycle/lifecycle.go    |    51 +
 .../v7/pkg/notification/notification.go       |     1 +
 .../v7/pkg/replication/replication.go         |     3 +
 .../minio/minio-go/v7/pkg/s3utils/utils.go    |     2 +-
 .../minio/minio-go/v7/pkg/set/stringset.go    |     4 +-
 .../minio/minio-go/v7/pkg/tags/tags.go        |     2 +-
 .../minio/minio-go/v7/post-policy.go          |   124 +-
 .../minio/minio-go/v7/retry-continous.go      |    10 +-
 vendor/github.com/minio/minio-go/v7/retry.go  |    15 +-
 .../minio/minio-go/v7/s3-endpoints.go         |     4 +
 .../github.com/minio/minio-go/v7/s3-error.go  |     1 +
 vendor/github.com/minio/minio-go/v7/utils.go  |   175 +-
 .../github.com/minio/sha256-simd/.gitignore   |     1 -
 vendor/github.com/minio/sha256-simd/LICENSE   |   202 -
 vendor/github.com/minio/sha256-simd/README.md |   137 -
 .../minio/sha256-simd/cpuid_other.go          |    50 -
 vendor/github.com/minio/sha256-simd/sha256.go |   468 -
 .../sha256-simd/sha256blockAvx512_amd64.asm   |   686 -
 .../sha256-simd/sha256blockAvx512_amd64.go    |   501 -
 .../sha256-simd/sha256blockAvx512_amd64.s     |   267 -
 .../minio/sha256-simd/sha256block_amd64.go    |    31 -
 .../minio/sha256-simd/sha256block_amd64.s     |   266 -
 .../minio/sha256-simd/sha256block_arm64.go    |    37 -
 .../minio/sha256-simd/sha256block_arm64.s     |   192 -
 .../minio/sha256-simd/sha256block_other.go    |    29 -
 .../minio/sha256-simd/test-architectures.sh   |    15 -
 .../flowlogs-pipeline/pkg/api/encode_prom.go  |     1 +
 .../flowlogs-pipeline/pkg/api/encode_s3.go    |     2 +-
 .../pkg/api/transform_filter.go               |    22 +
 .../pkg/api/transform_network.go              |     2 +
 .../pkg/pipeline/encode/encode_prom.go        |    17 +-
 .../pkg/pipeline/encode/encode_prom_metric.go |   150 -
 .../pkg/pipeline/encode/metrics/filtering.go  |    28 +
 .../pkg/pipeline/encode/metrics/flattening.go |    88 +
 .../pkg/pipeline/encode/metrics/preprocess.go |    91 +
 .../pkg/pipeline/encode/metrics_common.go     |   211 +-
 .../opentelemetry/encode_otlpmetrics.go       |     3 +-
 .../encode/opentelemetry/opentelemetry.go     |     2 +-
 .../pipeline/extract/aggregate/aggregate.go   |    50 +-
 .../pipeline/extract/aggregate/aggregates.go  |     3 +-
 .../pkg/pipeline/extract/timebased/filters.go |     2 +-
 .../pipeline/extract/timebased/timebased.go   |     6 +-
 .../pkg/pipeline/ingest/ingest_grpc.go        |     7 +-
 .../pipeline/transform/kubernetes/enrich.go   |     4 +-
 .../pipeline/transform/transform_filter.go    |    63 +-
 .../pipeline/transform/transform_generic.go   |    26 +-
 .../pipeline/transform/transform_network.go   |    10 +-
 .../pkg/pipeline/write/write_ipfix.go         |    22 +-
 .../pkg/pipeline/write/write_loki.go          |     6 +-
 .../flowlogs-pipeline/pkg/utils/convert.go    |     4 +-
 .../pkg/utils/filters/filters.go              |   153 +
 .../flowlogs-pipeline/pkg/utils/tcp_flags.go  |    30 +
 vendor/github.com/rs/xid/.gitignore           |     3 +
 vendor/github.com/rs/xid/README.md            |    10 +-
 vendor/github.com/rs/xid/hostid_darwin.go     |    29 +-
 vendor/github.com/rs/xid/hostid_windows.go    |    20 +-
 vendor/github.com/rs/xid/id.go                |    13 +-
 vendor/go.opentelemetry.io/otel/.gitignore    |     8 -
 vendor/go.opentelemetry.io/otel/.golangci.yml |    13 +-
 vendor/go.opentelemetry.io/otel/CHANGELOG.md  |   103 +-
 vendor/go.opentelemetry.io/otel/CODEOWNERS    |     4 +-
 .../go.opentelemetry.io/otel/CONTRIBUTING.md  |    24 +-
 vendor/go.opentelemetry.io/otel/Makefile      |    11 +-
 vendor/go.opentelemetry.io/otel/README.md     |    11 +-
 vendor/go.opentelemetry.io/otel/RELEASING.md  |    11 -
 .../go.opentelemetry.io/otel/attribute/set.go |    40 +-
 .../otel/baggage/baggage.go                   |     6 +-
 .../otlp/otlpmetric/otlpmetricgrpc/client.go  |     7 +-
 .../otlpmetricgrpc/internal/oconf/options.go  |     6 +-
 .../otlpmetricgrpc/internal/oconf/tls.go      |     2 +-
 .../internal/transform/metricdata.go          |    18 +-
 .../otlp/otlpmetric/otlpmetricgrpc/version.go |     2 +-
 .../otel/internal/global/instruments.go       |    14 +-
 .../otel/internal/global/meter.go             |   382 +-
 .../otel/internal/global/trace.go             |     8 +-
 .../otel/internal/rawhelpers.go               |     3 +-
 .../otel/metric/asyncfloat64.go               |     2 +-
 .../otel/metric/asyncint64.go                 |     2 +-
 .../otel/metric/instrument.go                 |     2 +-
 vendor/go.opentelemetry.io/otel/renovate.json |     8 +
 .../otel/sdk/instrumentation/scope.go         |     4 +
 .../otel/sdk/metric/config.go                 |    79 +-
 .../otel/sdk/metric/exemplar.go               |    68 +-
 .../otel/sdk/metric/exemplar/README.md        |     3 +
 .../sdk/metric/{internal => }/exemplar/doc.go |     2 +-
 .../{internal => }/exemplar/exemplar.go       |     2 +-
 .../metric/{internal => }/exemplar/filter.go  |    11 +-
 .../fixed_size_reservoir.go}                  |   118 +-
 .../metric/exemplar/histogram_reservoir.go    |    70 +
 .../{internal => }/exemplar/reservoir.go      |    10 +-
 .../metric/{internal => }/exemplar/storage.go |     8 +-
 .../metric/{internal => }/exemplar/value.go   |     5 +-
 .../otel/sdk/metric/instrument.go             |    14 +-
 .../metric/internal/aggregate/aggregate.go    |     9 +-
 .../sdk/metric/internal/aggregate/drop.go     |    27 +
 .../sdk/metric/internal/aggregate/exemplar.go |     3 +-
 .../aggregate/exponential_histogram.go        |     9 +-
 .../internal/aggregate/filtered_reservoir.go  |    50 +
 .../metric/internal/aggregate/histogram.go    |    11 +-
 .../metric/internal/aggregate/lastvalue.go    |    11 +-
 .../otel/sdk/metric/internal/aggregate/sum.go |    17 +-
 .../otel/sdk/metric/internal/exemplar/drop.go |    23 -
 .../internal/exemplar/filtered_reservoir.go   |    49 -
 .../otel/sdk/metric/internal/exemplar/hist.go |    46 -
 .../otel/sdk/metric/internal/x/x.go           |    46 +-
 .../otel/sdk/metric/manual_reader.go          |     9 +-
 .../otel/sdk/metric/meter.go                  |   121 +-
 .../otel/sdk/metric/periodic_reader.go        |     9 +-
 .../otel/sdk/metric/pipeline.go               |   132 +-
 .../otel/sdk/metric/provider.go               |    10 +-
 .../otel/sdk/metric/version.go                |     2 +-
 .../otel/sdk/metric/view.go                   |    11 +-
 .../otel/sdk/resource/auto.go                 |    62 +-
 .../otel/sdk/resource/builtin.go              |     6 +-
 .../otel/sdk/resource/host_id_windows.go      |     7 +-
 .../otel/sdk/resource/os_windows.go           |     1 -
 .../otel/sdk/trace/batch_span_processor.go    |     7 +-
 .../otel/sdk/trace/evictedqueue.go            |    21 +-
 .../otel/sdk/trace/provider.go                |     9 +-
 .../otel/sdk/trace/span.go                    |   108 +-
 .../go.opentelemetry.io/otel/sdk/version.go   |     2 +-
 .../go.opentelemetry.io/otel/trace/context.go |     2 +-
 vendor/go.opentelemetry.io/otel/trace/doc.go  |     2 +-
 .../otel/verify_examples.sh                   |    74 -
 vendor/go.opentelemetry.io/otel/version.go    |     2 +-
 vendor/go.opentelemetry.io/otel/versions.yaml |    15 +-
 .../golang.org/x/crypto/argon2/blamka_amd64.s |  2972 ++-
 .../x/crypto/blake2b/blake2bAVX2_amd64.s      |  5167 +++-
 .../x/crypto/blake2b/blake2b_amd64.s          |  1681 +-
 vendor/golang.org/x/net/http2/config.go       |   122 +
 vendor/golang.org/x/net/http2/config_go124.go |    61 +
 .../x/net/http2/config_pre_go124.go           |    16 +
 vendor/golang.org/x/net/http2/http2.go        |    53 +-
 vendor/golang.org/x/net/http2/server.go       |   181 +-
 vendor/golang.org/x/net/http2/transport.go    |   143 +-
 vendor/golang.org/x/net/http2/write.go        |    10 +
 .../golang.org/x/net/websocket/websocket.go   |     2 +-
 vendor/golang.org/x/oauth2/token.go           |     7 +
 .../golang.org/x/sys/cpu/asm_darwin_x86_gc.s  |    17 +
 vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go |    61 +
 vendor/golang.org/x/sys/cpu/cpu_gc_x86.go     |     4 +-
 .../x/sys/cpu/{cpu_x86.s => cpu_gc_x86.s}     |     2 +-
 vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go  |     6 -
 .../golang.org/x/sys/cpu/cpu_linux_arm64.go   |     1 -
 vendor/golang.org/x/sys/cpu/cpu_other_x86.go  |    11 +
 vendor/golang.org/x/sys/cpu/cpu_x86.go        |     6 +-
 .../x/sys/cpu/syscall_darwin_x86_gc.go        |    98 +
 vendor/golang.org/x/sys/unix/ioctl_linux.go   |    96 +
 vendor/golang.org/x/sys/unix/mkerrors.sh      |    12 +
 vendor/golang.org/x/sys/unix/syscall_linux.go |     1 +
 .../x/sys/unix/syscall_zos_s390x.go           |   104 +-
 vendor/golang.org/x/sys/unix/zerrors_linux.go |    31 +
 .../x/sys/unix/zerrors_linux_386.go           |    20 +
 .../x/sys/unix/zerrors_linux_amd64.go         |    20 +
 .../x/sys/unix/zerrors_linux_arm.go           |    20 +
 .../x/sys/unix/zerrors_linux_arm64.go         |    21 +
 .../x/sys/unix/zerrors_linux_loong64.go       |    20 +
 .../x/sys/unix/zerrors_linux_mips.go          |    20 +
 .../x/sys/unix/zerrors_linux_mips64.go        |    20 +
 .../x/sys/unix/zerrors_linux_mips64le.go      |    20 +
 .../x/sys/unix/zerrors_linux_mipsle.go        |    20 +
 .../x/sys/unix/zerrors_linux_ppc.go           |    20 +
 .../x/sys/unix/zerrors_linux_ppc64.go         |    20 +
 .../x/sys/unix/zerrors_linux_ppc64le.go       |    20 +
 .../x/sys/unix/zerrors_linux_riscv64.go       |    20 +
 .../x/sys/unix/zerrors_linux_s390x.go         |    20 +
 .../x/sys/unix/zerrors_linux_sparc64.go       |    20 +
 .../golang.org/x/sys/unix/zsyscall_linux.go   |    10 +
 .../x/sys/unix/ztypes_darwin_amd64.go         |    60 +
 .../x/sys/unix/ztypes_darwin_arm64.go         |    60 +
 vendor/golang.org/x/sys/unix/ztypes_linux.go  |   138 +-
 .../golang.org/x/sys/unix/ztypes_zos_s390x.go |     6 +
 .../x/sys/windows/syscall_windows.go          |    36 +-
 .../golang.org/x/sys/windows/types_windows.go |   127 +
 .../x/sys/windows/zsyscall_windows.go         |    71 +
 vendor/golang.org/x/term/README.md            |    11 +-
 vendor/golang.org/x/term/term_windows.go      |     1 +
 vendor/modules.txt                            |    83 +-
 337 files changed, 64399 insertions(+), 19540 deletions(-)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/.editorconfig (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/.gitignore (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/.golangci.yml (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/LICENSE (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/Makefile (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/README.md (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/codecov.yml (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/data_source.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/deprecated.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/error.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/file.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/helper.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/ini.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/key.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/parser.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/section.go (100%)
 rename vendor/{gopkg.in/ini.v1 => github.com/go-ini/ini}/struct.go (100%)
 create mode 100644 vendor/github.com/goccy/go-json/.codecov.yml
 create mode 100644 vendor/github.com/goccy/go-json/.gitignore
 create mode 100644 vendor/github.com/goccy/go-json/.golangci.yml
 create mode 100644 vendor/github.com/goccy/go-json/CHANGELOG.md
 create mode 100644 vendor/github.com/goccy/go-json/LICENSE
 create mode 100644 vendor/github.com/goccy/go-json/Makefile
 create mode 100644 vendor/github.com/goccy/go-json/README.md
 create mode 100644 vendor/github.com/goccy/go-json/color.go
 create mode 100644 vendor/github.com/goccy/go-json/decode.go
 create mode 100644 vendor/github.com/goccy/go-json/docker-compose.yml
 create mode 100644 vendor/github.com/goccy/go-json/encode.go
 create mode 100644 vendor/github.com/goccy/go-json/error.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/anonymous_field.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/array.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/assign.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/bool.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/bytes.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/compile.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/compile_race.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/context.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/float.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/func.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/int.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/interface.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/invalid.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/map.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/number.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/option.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/path.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/ptr.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/slice.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/stream.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/string.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/struct.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/type.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/uint.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/unmarshal_json.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/decoder/wrapped_string.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/code.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/compact.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/compiler.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/context.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/decode_rune.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/encoder.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/indent.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/int.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/map112.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/map113.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/opcode.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/option.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/optype.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/query.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/string.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/string_table.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm/debug_vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm/hack.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm/util.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm/vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_color/debug_vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_color/hack.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_color/util.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_color/vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/debug_vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/util.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_indent/debug_vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_indent/hack.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_indent/util.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/encoder/vm_indent/vm.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/errors/error.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/runtime/rtype.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/runtime/struct_field.go
 create mode 100644 vendor/github.com/goccy/go-json/internal/runtime/type.go
 create mode 100644 vendor/github.com/goccy/go-json/json.go
 create mode 100644 vendor/github.com/goccy/go-json/option.go
 create mode 100644 vendor/github.com/goccy/go-json/path.go
 create mode 100644 vendor/github.com/goccy/go-json/query.go
 create mode 100644 vendor/github.com/minio/minio-go/v7/api-bucket-cors.go
 create mode 100644 vendor/github.com/minio/minio-go/v7/api-prompt-object.go
 create mode 100644 vendor/github.com/minio/minio-go/v7/api-prompt-options.go
 create mode 100644 vendor/github.com/minio/minio-go/v7/pkg/cors/cors.go
 delete mode 100644 vendor/github.com/minio/sha256-simd/.gitignore
 delete mode 100644 vendor/github.com/minio/sha256-simd/LICENSE
 delete mode 100644 vendor/github.com/minio/sha256-simd/README.md
 delete mode 100644 vendor/github.com/minio/sha256-simd/cpuid_other.go
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256.go
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256block_amd64.go
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256block_amd64.s
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256block_arm64.go
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256block_arm64.s
 delete mode 100644 vendor/github.com/minio/sha256-simd/sha256block_other.go
 delete mode 100644 vendor/github.com/minio/sha256-simd/test-architectures.sh
 delete mode 100644 vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom_metric.go
 create mode 100644 vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/filtering.go
 create mode 100644 vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/flattening.go
 create mode 100644 vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/preprocess.go
 create mode 100644 vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/filters/filters.go
 create mode 100644 vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/tcp_flags.go
 create mode 100644 vendor/github.com/rs/xid/.gitignore
 create mode 100644 vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/README.md
 rename vendor/go.opentelemetry.io/otel/sdk/metric/{internal => }/exemplar/doc.go (93%)
 rename vendor/go.opentelemetry.io/otel/sdk/metric/{internal => }/exemplar/exemplar.go (98%)
 rename vendor/go.opentelemetry.io/otel/sdk/metric/{internal => }/exemplar/filter.go (75%)
 rename vendor/go.opentelemetry.io/otel/sdk/metric/{internal/exemplar/rand.go => exemplar/fixed_size_reservoir.go} (73%)
 create mode 100644 vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/histogram_reservoir.go
 rename vendor/go.opentelemetry.io/otel/sdk/metric/{internal => }/exemplar/reservoir.go (73%)
 rename vendor/go.opentelemetry.io/otel/sdk/metric/{internal => }/exemplar/storage.go (94%)
 rename vendor/go.opentelemetry.io/otel/sdk/metric/{internal => }/exemplar/value.go (91%)
 create mode 100644 vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/drop.go
 create mode 100644 vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/filtered_reservoir.go
 delete mode 100644 vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/drop.go
 delete mode 100644 vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filtered_reservoir.go
 delete mode 100644 vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/hist.go
 delete mode 100644 vendor/go.opentelemetry.io/otel/verify_examples.sh
 create mode 100644 vendor/golang.org/x/net/http2/config.go
 create mode 100644 vendor/golang.org/x/net/http2/config_go124.go
 create mode 100644 vendor/golang.org/x/net/http2/config_pre_go124.go
 create mode 100644 vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go
 rename vendor/golang.org/x/sys/cpu/{cpu_x86.s => cpu_gc_x86.s} (94%)
 create mode 100644 vendor/golang.org/x/sys/cpu/cpu_other_x86.go
 create mode 100644 vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go

diff --git a/go.mod b/go.mod
index 6a83f5a0..3e5ec180 100644
--- a/go.mod
+++ b/go.mod
@@ -12,7 +12,7 @@ require (
 	github.com/gopacket/gopacket v1.2.0
 	github.com/mariomac/guara v0.0.0-20220523124851-5fc279816f1f
 	github.com/mdlayher/ethernet v0.0.0-20220221185849-529eae5b6118
-	github.com/netobserv/flowlogs-pipeline v1.6.1-crc0.0.20240930010330-bc3d4ebb4a88
+	github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e
 	github.com/netobserv/gopipes v0.3.0
 	github.com/ovn-org/ovn-kubernetes/go-controller v0.0.0-20241126140656-c95491e46334
 	github.com/paulbellamy/ratecounter v0.2.0
@@ -24,7 +24,7 @@ require (
 	github.com/vishvananda/netns v0.0.4
 	github.com/vladimirvivien/gexe v0.3.0
 	github.com/vmware/go-ipfix v0.9.0
-	golang.org/x/sys v0.26.0
+	golang.org/x/sys v0.28.0
 	google.golang.org/grpc v1.67.1
 	google.golang.org/protobuf v1.35.1
 	gopkg.in/yaml.v2 v2.4.0
@@ -53,6 +53,7 @@ require (
 	github.com/emicklei/go-restful/v3 v3.11.0 // indirect
 	github.com/evanphx/json-patch/v5 v5.9.0 // indirect
 	github.com/fxamacker/cbor/v2 v2.7.0 // indirect
+	github.com/go-ini/ini v1.67.0 // indirect
 	github.com/go-kit/kit v0.13.0 // indirect
 	github.com/go-kit/log v0.2.1 // indirect
 	github.com/go-logfmt/logfmt v0.5.1 // indirect
@@ -61,6 +62,7 @@ require (
 	github.com/go-openapi/jsonpointer v0.19.6 // indirect
 	github.com/go-openapi/jsonreference v0.20.2 // indirect
 	github.com/go-openapi/swag v0.22.4 // indirect
+	github.com/goccy/go-json v0.10.3 // indirect
 	github.com/gogo/protobuf v1.3.2 // indirect
 	github.com/golang/protobuf v1.5.4 // indirect
 	github.com/golang/snappy v0.0.4 // indirect
@@ -69,20 +71,19 @@ require (
 	github.com/google/gofuzz v1.2.0 // indirect
 	github.com/google/uuid v1.6.0 // indirect
 	github.com/gorilla/websocket v1.5.0 // indirect
-	github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 // indirect
+	github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 // indirect
 	github.com/heptiolabs/healthcheck v0.0.0-20211123025425-613501dd5deb // indirect
 	github.com/imdario/mergo v0.3.15 // indirect
 	github.com/ip2location/ip2location-go/v9 v9.7.0 // indirect
 	github.com/josharian/intern v1.0.0 // indirect
 	github.com/jpillora/backoff v1.0.0 // indirect
 	github.com/json-iterator/go v1.1.12 // indirect
-	github.com/klauspost/compress v1.17.9 // indirect
-	github.com/klauspost/cpuid/v2 v2.2.6 // indirect
+	github.com/klauspost/compress v1.17.11 // indirect
+	github.com/klauspost/cpuid/v2 v2.2.8 // indirect
 	github.com/libp2p/go-reuseport v0.3.0 // indirect
 	github.com/mailru/easyjson v0.7.7 // indirect
 	github.com/minio/md5-simd v1.1.2 // indirect
-	github.com/minio/minio-go/v7 v7.0.69 // indirect
-	github.com/minio/sha256-simd v1.0.1 // indirect
+	github.com/minio/minio-go/v7 v7.0.82 // indirect
 	github.com/mitchellh/mapstructure v1.5.0 // indirect
 	github.com/moby/spdystream v0.4.0 // indirect
 	github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
@@ -104,7 +105,7 @@ require (
 	github.com/prometheus/common v0.55.0 // indirect
 	github.com/prometheus/procfs v0.15.1 // indirect
 	github.com/prometheus/prometheus v1.8.2-0.20201028100903-3245b3267b24 // indirect
-	github.com/rs/xid v1.5.0 // indirect
+	github.com/rs/xid v1.6.0 // indirect
 	github.com/russross/blackfriday/v2 v2.1.0 // indirect
 	github.com/safchain/ethtool v0.3.1-0.20231027162144-83e5e0097c91 // indirect
 	github.com/spf13/pflag v1.0.5 // indirect
@@ -115,30 +116,29 @@ require (
 	github.com/xdg-go/scram v1.1.2 // indirect
 	github.com/xdg-go/stringprep v1.0.4 // indirect
 	github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913 // indirect
-	go.opentelemetry.io/otel v1.29.0 // indirect
-	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0 // indirect
+	go.opentelemetry.io/otel v1.32.0 // indirect
+	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 // indirect
 	go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.26.0 // indirect
-	go.opentelemetry.io/otel/metric v1.29.0 // indirect
-	go.opentelemetry.io/otel/sdk v1.29.0 // indirect
-	go.opentelemetry.io/otel/sdk/metric v1.29.0 // indirect
-	go.opentelemetry.io/otel/trace v1.29.0 // indirect
+	go.opentelemetry.io/otel/metric v1.32.0 // indirect
+	go.opentelemetry.io/otel/sdk v1.32.0 // indirect
+	go.opentelemetry.io/otel/sdk/metric v1.32.0 // indirect
+	go.opentelemetry.io/otel/trace v1.32.0 // indirect
 	go.opentelemetry.io/proto/otlp v1.3.1 // indirect
 	go.uber.org/atomic v1.9.0 // indirect
-	golang.org/x/crypto v0.26.0 // indirect
+	golang.org/x/crypto v0.31.0 // indirect
 	golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842 // indirect
-	golang.org/x/net v0.28.0 // indirect
-	golang.org/x/oauth2 v0.22.0 // indirect
-	golang.org/x/term v0.23.0 // indirect
-	golang.org/x/text v0.17.0 // indirect
+	golang.org/x/net v0.30.0 // indirect
+	golang.org/x/oauth2 v0.23.0 // indirect
+	golang.org/x/term v0.27.0 // indirect
+	golang.org/x/text v0.21.0 // indirect
 	golang.org/x/time v0.5.0 // indirect
-	google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd // indirect
-	google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd // indirect
+	google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 // indirect
+	google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 // indirect
 	gopkg.in/gcfg.v1 v1.2.3 // indirect
 	gopkg.in/inf.v0 v0.9.1 // indirect
-	gopkg.in/ini.v1 v1.67.0 // indirect
 	gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
 	gopkg.in/warnings.v0 v0.1.2 // indirect
 	gopkg.in/yaml.v3 v3.0.1 // indirect
diff --git a/go.sum b/go.sum
index 31e2d023..d83f18d9 100644
--- a/go.sum
+++ b/go.sum
@@ -227,6 +227,8 @@ github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1T
 github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
 github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
+github.com/go-ini/ini v1.67.0 h1:z6ZrTEZqSWOTyH2FlglNbNgARyHG8oLW9gMELqKr06A=
+github.com/go-ini/ini v1.67.0/go.mod h1:ByCAeIL28uOIIG0E3PJtZPDL8WnHpFKFOtgjp+3Ies8=
 github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
 github.com/go-kit/kit v0.10.0/go.mod h1:xUsJbQ/Fp4kEt7AFgCuvyX4a71u8h9jB8tj/ORgOZ7o=
@@ -347,6 +349,8 @@ github.com/gobuffalo/packd v0.1.0/go.mod h1:M2Juc+hhDXf/PnmBANFCqx4DM3wRbgDvnVWe
 github.com/gobuffalo/packr/v2 v2.0.9/go.mod h1:emmyGweYTm6Kdper+iywB6YK5YzuKchGtJQZ0Odn4pQ=
 github.com/gobuffalo/packr/v2 v2.2.0/go.mod h1:CaAwI0GPIAv+5wKLtv8Afwl+Cm78K/I/VCm/3ptBN+0=
 github.com/gobuffalo/syncx v0.0.0-20190224160051-33c29581e754/go.mod h1:HhnNqWY95UYwwW3uSASeV7vtgYkT2t16hJgV3AEPUpw=
+github.com/goccy/go-json v0.10.3 h1:KZ5WoDbxAIgm2HNbYckL0se1fHD6rz5j4ywS6ebzDqA=
+github.com/goccy/go-json v0.10.3/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M=
 github.com/gofrs/uuid v3.3.0+incompatible/go.mod h1:b2aQJv3Z4Fp6yNu3cdSllBxTCLRxnplIgP/c0N/04lM=
 github.com/gogo/googleapis v1.1.0/go.mod h1:gf4bu3Q80BeJ6H1S1vYPm8/ELATdvryBaNFGgqEef3s=
 github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
@@ -455,8 +459,8 @@ github.com/grpc-ecosystem/go-grpc-middleware v1.0.1-0.20190118093823-f849b5445de
 github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk=
 github.com/grpc-ecosystem/grpc-gateway v1.9.5/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=
 github.com/grpc-ecosystem/grpc-gateway v1.15.0/go.mod h1:vO11I9oWA+KsxmfFQPhLnnIb1VDE24M+pdxZFiuZcA8=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0 h1:asbCHRVmodnJTuQ3qamDwqVOIjwqUPTYmYuemVOx+Ys=
-github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0/go.mod h1:ggCgvZ2r7uOoQjOyu2Y1NhHmEPPzzuhWgcza5M1Ji1I=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0 h1:ad0vkEBuk23VJzZR9nkLVG0YAoN9coASF1GusYX6AlU=
+github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0/go.mod h1:igFoXX2ELCW06bol23DWPB5BEWfZISOzSP5K2sbLea0=
 github.com/hashicorp/consul/api v1.3.0/go.mod h1:MmDNSzIMUjNpY/mQ398R4bk2FnqQLoPndWW5VkKPlCE=
 github.com/hashicorp/consul/api v1.7.0/go.mod h1:1NSuaUUkFaJzMasbfq/11wKYWSR67Xn6r2DXKhuDNFg=
 github.com/hashicorp/consul/sdk v0.3.0/go.mod h1:VKf9jXwCTEY1QZP2MOLRhb5i/I/ssyNV1vwHyQBF0x8=
@@ -565,12 +569,12 @@ github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+o
 github.com/klauspost/compress v1.4.0/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.9.5/go.mod h1:RyIbtBH6LamlWaDj8nUwkbUhJ87Yi3uG0guNDohfE1A=
 github.com/klauspost/compress v1.15.9/go.mod h1:PhcZ0MbTNciWF3rruxRgKxI5NkcHHrHUDtV4Yw2GlzU=
-github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
-github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
+github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc=
+github.com/klauspost/compress v1.17.11/go.mod h1:pMDklpSncoRMuLFrf1W9Ss9KT+0rH90U12bZKk7uwG0=
 github.com/klauspost/cpuid v0.0.0-20170728055534-ae7887de9fa5/go.mod h1:Pj4uuM528wm8OyEC2QMXAi2YiTZ96dNQPGgoMS4s3ek=
 github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
-github.com/klauspost/cpuid/v2 v2.2.6 h1:ndNyv040zDGIDh8thGkXYjnFtiN02M1PVVF+JE/48xc=
-github.com/klauspost/cpuid/v2 v2.2.6/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
+github.com/klauspost/cpuid/v2 v2.2.8 h1:+StwCXwm9PdpiEkPyzBXIy+M9KUb4ODm0Zarf1kS5BM=
+github.com/klauspost/cpuid/v2 v2.2.8/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws=
 github.com/klauspost/crc32 v0.0.0-20161016154125-cb6bfca970f6/go.mod h1:+ZoRqAPRLkC4NPOvfYeR5KNOrY6TD+/sAC3HXPZgDYg=
 github.com/klauspost/pgzip v1.0.2-0.20170402124221-0bf5dcad4ada/go.mod h1:Ch1tH69qFZu15pkjo5kYi6mth2Zzwzt50oCQKQE9RUs=
 github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ=
@@ -644,10 +648,8 @@ github.com/miekg/dns v1.1.43 h1:JKfpVSCB84vrAmHzyrsxB5NAr5kLoMXZArPSw7Qlgyg=
 github.com/miekg/dns v1.1.43/go.mod h1:+evo5L0630/F6ca/Z9+GAqzhjGyn8/c+TBaOyfEl0V4=
 github.com/minio/md5-simd v1.1.2 h1:Gdi1DZK69+ZVMoNHRXJyNcxrMA4dSxoYHZSQbirFg34=
 github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEpN+20RM=
-github.com/minio/minio-go/v7 v7.0.69 h1:l8AnsQFyY1xiwa/DaQskY4NXSLA2yrGsW5iD9nRPVS0=
-github.com/minio/minio-go/v7 v7.0.69/go.mod h1:XAvOPJQ5Xlzk5o3o/ArO2NMbhSGkimC+bpW/ngRKDmQ=
-github.com/minio/sha256-simd v1.0.1 h1:6kaan5IFmwTNynnKKpDHe6FWHohJOHhCPchzK49dzMM=
-github.com/minio/sha256-simd v1.0.1/go.mod h1:Pz6AKMiUdngCLpeTL/RJY1M9rUuPMYujV5xJjtbRSN8=
+github.com/minio/minio-go/v7 v7.0.82 h1:tWfICLhmp2aFPXL8Tli0XDTHj2VB/fNf0PC1f/i1gRo=
+github.com/minio/minio-go/v7 v7.0.82/go.mod h1:84gmIilaX4zcvAWWzJ5Z1WI5axN+hAbM5w25xf8xvC0=
 github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
 github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
 github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw=
@@ -692,8 +694,8 @@ github.com/nats-io/nats.go v1.9.1/go.mod h1:ZjDU1L/7fJ09jvUSRVBR2e7+RnLiiIQyqyzE
 github.com/nats-io/nkeys v0.1.0/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w=
 github.com/nats-io/nkeys v0.1.3/go.mod h1:xpnFELMwJABBLVhffcfd1MZx6VsNRFpEugbxziKVo7w=
 github.com/nats-io/nuid v1.0.1/go.mod h1:19wcPz3Ph3q0Jbyiqsd0kePYG7A95tJPxeL+1OSON2c=
-github.com/netobserv/flowlogs-pipeline v1.6.1-crc0.0.20240930010330-bc3d4ebb4a88 h1:gY72UBc81yWeat5zxkw6pRS30tE3pnFad5zKzCdm3TM=
-github.com/netobserv/flowlogs-pipeline v1.6.1-crc0.0.20240930010330-bc3d4ebb4a88/go.mod h1:VVHQK3sEL91gl6CQkrcE+RAi9vMat5gaVaZLqPDoKlQ=
+github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e h1:MdCBEv8sbkhWwoKmp99oeg7eS0tRlD1FTZKRg7r5xWA=
+github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e/go.mod h1:P8Gf2JTrvyHpTVZs/N2M1M9nMk1Uu/uvwhVe11c3b9Y=
 github.com/netobserv/gopipes v0.3.0 h1:IYmPnnAVCdSK7VmHmpFhrVBOEm45qpgbZmJz1sSW+60=
 github.com/netobserv/gopipes v0.3.0/go.mod h1:N7/Gz05EOF0CQQSKWsv3eof22Cj2PB08Pbttw98YFYU=
 github.com/netobserv/loki-client-go v0.0.0-20220927092034-f37122a54500 h1:RmnoJe/ci5q+QdM7upFdxiU+D8F3L3qTd5wXCwwHefw=
@@ -837,11 +839,11 @@ github.com/rogpeppe/fastuuid v1.2.0/go.mod h1:jVj6XXZzXRy/MSR5jhDC/2q6DgLz+nrA6L
 github.com/rogpeppe/go-internal v1.1.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.2.2/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
 github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
-github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8=
-github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4=
+github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
+github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
 github.com/rs/cors v1.7.0/go.mod h1:gFx+x8UowdsKA9AchylcLynDq+nNFfI8FkUZdN/jGCU=
-github.com/rs/xid v1.5.0 h1:mKX4bl4iPYJtEIxp6CYiUuLQ/8DYMoz0PUdtGgMFRVc=
-github.com/rs/xid v1.5.0/go.mod h1:trrq9SKmegXys3aeAKXMUTdJsYXVwGY3RLcfgqegfbg=
+github.com/rs/xid v1.6.0 h1:fV591PaemRlL6JfRxGDEPl69wICngIQ3shQtzfy2gxU=
+github.com/rs/xid v1.6.0/go.mod h1:7XoLgs4eV+QndskICGsho+ADou8ySMSjJKDIan90Nz0=
 github.com/russross/blackfriday/v2 v2.0.1/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
 github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf35Ld67mk=
 github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
@@ -972,10 +974,10 @@ go.opencensus.io v0.22.0/go.mod h1:+kGneAE2xo2IficOXnaByMWTGM9T73dGwxeWcUqIpI8=
 go.opencensus.io v0.22.2/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.opencensus.io v0.22.3/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
 go.opencensus.io v0.22.4/go.mod h1:yxeiOL68Rb0Xd1ddK5vPZ/oVn4vY4Ynel7k9FzqtOIw=
-go.opentelemetry.io/otel v1.29.0 h1:PdomN/Al4q/lN6iBJEN3AwPvUiHPMlt93c8bqTG5Llw=
-go.opentelemetry.io/otel v1.29.0/go.mod h1:N/WtXPs1CNCUEx+Agz5uouwCba+i+bJGFicT8SR4NP8=
-go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0 h1:k6fQVDQexDE+3jG2SfCQjnHS7OamcP73YMoxEVq5B6k=
-go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0/go.mod h1:t4BrYLHU450Zo9fnydWlIuswB1bm7rM8havDpWOJeDo=
+go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U=
+go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0 h1:j7ZSD+5yn+lo3sGV69nW04rRR0jhYnBwjuX3r0HvnK0=
+go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0/go.mod h1:WXbYJTUaZXAbYd8lbgGuvih0yuCfOFC5RJoYnoLcGz8=
 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0 h1:xvhQxJ/C9+RTnAj5DpTg7LSM1vbbMTiXt7e9hsfqHNw=
 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.29.0/go.mod h1:Fcvs2Bz1jkDM+Wf5/ozBGmi3tQ/c9zPKLnsipnfhGAo=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.28.0 h1:3Q/xZUyC1BBkualc9ROb4G8qkH90LXEIICcs5zv1OYY=
@@ -984,14 +986,14 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0 h1:qFffA
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.27.0/go.mod h1:MOiCmryaYtc+V0Ei+Tx9o5S1ZjA7kzLucuVuyzBZloQ=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.26.0 h1:1wp/gyxsuYtuE/JFxsQRtcCDtMrO2qMvlfXALU5wkzI=
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.26.0/go.mod h1:gbTHmghkGgqxMomVQQMur1Nba4M0MQ8AYThXDUjsJ38=
-go.opentelemetry.io/otel/metric v1.29.0 h1:vPf/HFWTNkPu1aYeIsc98l4ktOQaL6LeSoeV2g+8YLc=
-go.opentelemetry.io/otel/metric v1.29.0/go.mod h1:auu/QWieFVWx+DmQOUMgj0F8LHWdgalxXqvp7BII/W8=
-go.opentelemetry.io/otel/sdk v1.29.0 h1:vkqKjk7gwhS8VaWb0POZKmIEDimRCMsopNYnriHyryo=
-go.opentelemetry.io/otel/sdk v1.29.0/go.mod h1:pM8Dx5WKnvxLCb+8lG1PRNIDxu9g9b9g59Qr7hfAAok=
-go.opentelemetry.io/otel/sdk/metric v1.29.0 h1:K2CfmJohnRgvZ9UAj2/FhIf/okdWcNdBwe1m8xFXiSY=
-go.opentelemetry.io/otel/sdk/metric v1.29.0/go.mod h1:6zZLdCl2fkauYoZIOn/soQIDSWFmNSRcICarHfuhNJQ=
-go.opentelemetry.io/otel/trace v1.29.0 h1:J/8ZNK4XgR7a21DZUAsbF8pZ5Jcw1VhACmnYt39JTi4=
-go.opentelemetry.io/otel/trace v1.29.0/go.mod h1:eHl3w0sp3paPkYstJOmAimxhiFXPg+MMTlEh3nsQgWQ=
+go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M=
+go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8=
+go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4=
+go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU=
+go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
+go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
+go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM=
+go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8=
 go.opentelemetry.io/proto/otlp v1.3.1 h1:TrMUixzpM0yuc/znrFTP9MMRh8trP93mkCiDVeXrui0=
 go.opentelemetry.io/proto/otlp v1.3.1/go.mod h1:0X1WI4de4ZsLrrJNLAQbFeLCm3T7yBkR0XqQ7niQU+8=
 go.uber.org/atomic v1.3.2/go.mod h1:gD2HeocX3+yG+ygLZcrzQJaqmWj9AIm7n08wl/qW/PE=
@@ -1032,8 +1034,8 @@ golang.org/x/crypto v0.0.0-20201002170205-7f63de1d35b0/go.mod h1:LzIPMQfyMNhhGPh
 golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
 golang.org/x/crypto v0.5.0/go.mod h1:NK/OQwhpMQP3MwtdjgLlYHnH9ebylxKWv3e0fK+mkQU=
 golang.org/x/crypto v0.14.0/go.mod h1:MVFd36DqK4CsrnJYDkBA3VC4m2GkXAM0PvzMCn4JQf4=
-golang.org/x/crypto v0.26.0 h1:RrRspgV4mU+YwB4FYnuBoKsUapNIL5cohGAmSH3azsw=
-golang.org/x/crypto v0.26.0/go.mod h1:GY7jblb9wI+FOo5y8/S2oY4zWP07AkOJ4+jxCqdqn54=
+golang.org/x/crypto v0.31.0 h1:ihbySMvVjLAeSH1IbfcRTkD/iNscyz8rGzjF/E5hV6U=
+golang.org/x/crypto v0.31.0/go.mod h1:kDsLvtWBEx7MV9tJOj9bnXsPbxwJQ6csT/x4KIN4Ssk=
 golang.org/x/exp v0.0.0-20180321215751-8460e604b9de/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20180807140117-3d87b88a115f/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
 golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
@@ -1123,8 +1125,8 @@ golang.org/x/net v0.5.0/go.mod h1:DivGGAXEgPSlEBzxGzZI+ZLohi+xUj054jfeKui00ws=
 golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
 golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
 golang.org/x/net v0.17.0/go.mod h1:NxSsAGuq816PNPmqtQdLE42eU2Fs7NoRIZrHJAlaCOE=
-golang.org/x/net v0.28.0 h1:a9JDOJc5GMUJ0+UDqmLT86WiEy7iWyIhz8gz8E4e5hE=
-golang.org/x/net v0.28.0/go.mod h1:yqtgsTWOOnlGLG9GFRrK3++bGOUEkNBoHZc8MEDWPNg=
+golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4=
+golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU=
 golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
 golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -1132,8 +1134,8 @@ golang.org/x/oauth2 v0.0.0-20191202225959-858c2ad4c8b6/go.mod h1:gOpvHmFTYa4Iltr
 golang.org/x/oauth2 v0.0.0-20200107190931-bf48bf16ab8d/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
 golang.org/x/oauth2 v0.0.0-20200902213428-5d25da1a8d43/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
 golang.org/x/oauth2 v0.0.0-20210514164344-f6687ab2804c/go.mod h1:KelEdhl1UZF7XfJ4dDtk6s++YSgaE7mD/BuKKDLBl4A=
-golang.org/x/oauth2 v0.22.0 h1:BzDx2FehcG7jJwgWLELCdmLuxk2i+x9UDpSiss2u0ZA=
-golang.org/x/oauth2 v0.22.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
+golang.org/x/oauth2 v0.23.0 h1:PbgcYx2W7i4LvjJWEbf0ngHV6qJYr86PkAV3bXdLEbs=
+golang.org/x/oauth2 v0.23.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI=
 golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20181221193216-37e7f081c4d4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
@@ -1149,8 +1151,8 @@ golang.org/x/sync v0.0.0-20201207232520-09787c993a3a/go.mod h1:RxMgew5VJxzue5/jJ
 golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
 golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
-golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ=
-golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
+golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ=
+golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk=
 golang.org/x/sys v0.0.0-20180823144017-11551d06cbcc/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
 golang.org/x/sys v0.0.0-20180905080454-ebe1bf3edb33/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
@@ -1229,8 +1231,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
 golang.org/x/sys v0.13.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
-golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo=
-golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
+golang.org/x/sys v0.28.0 h1:Fksou7UEQUWlKvIdsqzJmUmCX3cZuD2+P3XyyzwMhlA=
+golang.org/x/sys v0.28.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
 golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
 golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
 golang.org/x/term v0.1.0/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -1238,8 +1240,8 @@ golang.org/x/term v0.4.0/go.mod h1:9P2UbLfCdcvo3p/nzKvsmas4TnlujnuoV9hGgYzW1lQ=
 golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
 golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
 golang.org/x/term v0.13.0/go.mod h1:LTmsnFJwVN6bCy1rVCoS+qHT1HhALEFxKncY3WNNh4U=
-golang.org/x/term v0.23.0 h1:F6D4vR+EHoL9/sWAWgAR1H2DcHr4PareCbAaCo1RpuU=
-golang.org/x/term v0.23.0/go.mod h1:DgV24QBUrK6jhZXl+20l6UWznPlwAHm1Q1mGHtydmSk=
+golang.org/x/term v0.27.0 h1:WP60Sv1nlK1T6SupCHbXzSaN0b9wUmsPoRS9b61A23Q=
+golang.org/x/term v0.27.0/go.mod h1:iMsnZpn0cago0GOrHO2+Y7u7JPn5AylBrcoWkElMTSM=
 golang.org/x/text v0.0.0-20170915032832-14c0d48ead0c/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
 golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
@@ -1253,8 +1255,8 @@ golang.org/x/text v0.6.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8=
 golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8=
 golang.org/x/text v0.13.0/go.mod h1:TvPlkZtksWOMsz7fbANvkp4WM8x/WCo/om8BMLbz+aE=
-golang.org/x/text v0.17.0 h1:XtiM5bkSOt+ewxlOE/aE/AKEHibwj/6gvWMl9Rsh0Qc=
-golang.org/x/text v0.17.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY=
+golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo=
+golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ=
 golang.org/x/time v0.0.0-20180412165947-fbb02b2291d2/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
 golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -1402,10 +1404,10 @@ google.golang.org/genproto v0.0.0-20200729003335-053ba62fc06f/go.mod h1:FWY/as6D
 google.golang.org/genproto v0.0.0-20200804131852-c06518451d9c/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/genproto v0.0.0-20200825200019-8632dd797987/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
 google.golang.org/genproto v0.0.0-20200904004341-0bd0a958aa1d/go.mod h1:FWY/as6DDZQgahTzZj3fqbO1CbirC29ZNUFHwi0/+no=
-google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd h1:BBOTEWLuuEGQy9n1y9MhVJ9Qt0BDu21X8qZs71/uPZo=
-google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:fO8wJzT2zbQbAjbIoos1285VfEIYKDDY+Dt+WpTkh6g=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd h1:6TEm2ZxXoQmFWFlt1vNxvVOa1Q0dXFQD1m/rYjXmS0E=
-google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd/go.mod h1:UqMtugtsSgubUsoxbuAoiCXvqvErP7Gf0so0mK9tHxU=
+google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28 h1:M0KvPgPmDZHPlbRbaNU1APr28TvwvvdUPlSv7PUvy8g=
+google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:dguCy7UOdZhTvLzDyt15+rOrawrpM4q7DD9dQ1P11P4=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28 h1:XVhgTWWV3kGQlwJHR3upFWZeTsei6Oks1apkZSeonIE=
+google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=
 google.golang.org/grpc v1.17.0/go.mod h1:6QZJwpn2B+Zp71q/5VxRsJ6NXXVCE5NRUHRo+f3cWCs=
 google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c=
 google.golang.org/grpc v1.20.0/go.mod h1:chYK+tFQF0nDUGJgXMSgLCQk3phJEuONr2DCgLDdAQM=
diff --git a/vendor/gopkg.in/ini.v1/.editorconfig b/vendor/github.com/go-ini/ini/.editorconfig
similarity index 100%
rename from vendor/gopkg.in/ini.v1/.editorconfig
rename to vendor/github.com/go-ini/ini/.editorconfig
diff --git a/vendor/gopkg.in/ini.v1/.gitignore b/vendor/github.com/go-ini/ini/.gitignore
similarity index 100%
rename from vendor/gopkg.in/ini.v1/.gitignore
rename to vendor/github.com/go-ini/ini/.gitignore
diff --git a/vendor/gopkg.in/ini.v1/.golangci.yml b/vendor/github.com/go-ini/ini/.golangci.yml
similarity index 100%
rename from vendor/gopkg.in/ini.v1/.golangci.yml
rename to vendor/github.com/go-ini/ini/.golangci.yml
diff --git a/vendor/gopkg.in/ini.v1/LICENSE b/vendor/github.com/go-ini/ini/LICENSE
similarity index 100%
rename from vendor/gopkg.in/ini.v1/LICENSE
rename to vendor/github.com/go-ini/ini/LICENSE
diff --git a/vendor/gopkg.in/ini.v1/Makefile b/vendor/github.com/go-ini/ini/Makefile
similarity index 100%
rename from vendor/gopkg.in/ini.v1/Makefile
rename to vendor/github.com/go-ini/ini/Makefile
diff --git a/vendor/gopkg.in/ini.v1/README.md b/vendor/github.com/go-ini/ini/README.md
similarity index 100%
rename from vendor/gopkg.in/ini.v1/README.md
rename to vendor/github.com/go-ini/ini/README.md
diff --git a/vendor/gopkg.in/ini.v1/codecov.yml b/vendor/github.com/go-ini/ini/codecov.yml
similarity index 100%
rename from vendor/gopkg.in/ini.v1/codecov.yml
rename to vendor/github.com/go-ini/ini/codecov.yml
diff --git a/vendor/gopkg.in/ini.v1/data_source.go b/vendor/github.com/go-ini/ini/data_source.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/data_source.go
rename to vendor/github.com/go-ini/ini/data_source.go
diff --git a/vendor/gopkg.in/ini.v1/deprecated.go b/vendor/github.com/go-ini/ini/deprecated.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/deprecated.go
rename to vendor/github.com/go-ini/ini/deprecated.go
diff --git a/vendor/gopkg.in/ini.v1/error.go b/vendor/github.com/go-ini/ini/error.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/error.go
rename to vendor/github.com/go-ini/ini/error.go
diff --git a/vendor/gopkg.in/ini.v1/file.go b/vendor/github.com/go-ini/ini/file.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/file.go
rename to vendor/github.com/go-ini/ini/file.go
diff --git a/vendor/gopkg.in/ini.v1/helper.go b/vendor/github.com/go-ini/ini/helper.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/helper.go
rename to vendor/github.com/go-ini/ini/helper.go
diff --git a/vendor/gopkg.in/ini.v1/ini.go b/vendor/github.com/go-ini/ini/ini.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/ini.go
rename to vendor/github.com/go-ini/ini/ini.go
diff --git a/vendor/gopkg.in/ini.v1/key.go b/vendor/github.com/go-ini/ini/key.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/key.go
rename to vendor/github.com/go-ini/ini/key.go
diff --git a/vendor/gopkg.in/ini.v1/parser.go b/vendor/github.com/go-ini/ini/parser.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/parser.go
rename to vendor/github.com/go-ini/ini/parser.go
diff --git a/vendor/gopkg.in/ini.v1/section.go b/vendor/github.com/go-ini/ini/section.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/section.go
rename to vendor/github.com/go-ini/ini/section.go
diff --git a/vendor/gopkg.in/ini.v1/struct.go b/vendor/github.com/go-ini/ini/struct.go
similarity index 100%
rename from vendor/gopkg.in/ini.v1/struct.go
rename to vendor/github.com/go-ini/ini/struct.go
diff --git a/vendor/github.com/goccy/go-json/.codecov.yml b/vendor/github.com/goccy/go-json/.codecov.yml
new file mode 100644
index 00000000..e9813457
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/.codecov.yml
@@ -0,0 +1,32 @@
+codecov:
+  require_ci_to_pass: yes
+
+coverage:
+  precision: 2
+  round: down
+  range: "70...100"
+
+  status:
+    project:
+      default:
+        target: 70%
+        threshold: 2%
+    patch: off
+    changes: no
+
+parsers:
+  gcov:
+    branch_detection:
+      conditional: yes
+      loop: yes
+      method: no
+      macro: no
+
+comment:
+  layout: "header,diff"
+  behavior: default
+  require_changes: no
+
+ignore:
+  - internal/encoder/vm_color
+  - internal/encoder/vm_color_indent
diff --git a/vendor/github.com/goccy/go-json/.gitignore b/vendor/github.com/goccy/go-json/.gitignore
new file mode 100644
index 00000000..37828382
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/.gitignore
@@ -0,0 +1,2 @@
+cover.html
+cover.out
diff --git a/vendor/github.com/goccy/go-json/.golangci.yml b/vendor/github.com/goccy/go-json/.golangci.yml
new file mode 100644
index 00000000..977accaa
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/.golangci.yml
@@ -0,0 +1,86 @@
+run:
+  skip-files:
+    - encode_optype.go
+    - ".*_test\\.go$"
+
+linters-settings:
+  govet:
+    enable-all: true
+    disable:
+      - shadow
+
+linters:
+  enable-all: true
+  disable:
+    - dogsled
+    - dupl
+    - exhaustive
+    - exhaustivestruct
+    - errorlint
+    - forbidigo
+    - funlen
+    - gci
+    - gochecknoglobals
+    - gochecknoinits
+    - gocognit
+    - gocritic
+    - gocyclo
+    - godot
+    - godox
+    - goerr113
+    - gofumpt
+    - gomnd
+    - gosec
+    - ifshort
+    - lll
+    - makezero
+    - nakedret
+    - nestif
+    - nlreturn
+    - paralleltest
+    - testpackage
+    - thelper
+    - wrapcheck
+    - interfacer
+    - lll
+    - nakedret
+    - nestif
+    - nlreturn
+    - testpackage
+    - wsl
+    - varnamelen
+    - nilnil
+    - ireturn
+    - govet
+    - forcetypeassert
+    - cyclop
+    - containedctx
+    - revive
+    - nosnakecase
+    - exhaustruct
+    - depguard
+
+issues:
+  exclude-rules:
+    # not needed
+    - path: /*.go
+      text: "ST1003: should not use underscores in package names"
+      linters:
+        - stylecheck
+    - path: /*.go
+      text: "don't use an underscore in package name"
+      linters:
+        - golint
+    - path: rtype.go
+      linters:
+        - golint
+        - stylecheck
+    - path: error.go
+      linters:
+        - staticcheck
+
+  # Maximum issues count per one linter. Set to 0 to disable. Default is 50.
+  max-issues-per-linter: 0
+
+  # Maximum count of issues with the same text. Set to 0 to disable. Default is 3.
+  max-same-issues: 0
diff --git a/vendor/github.com/goccy/go-json/CHANGELOG.md b/vendor/github.com/goccy/go-json/CHANGELOG.md
new file mode 100644
index 00000000..d09bb89c
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/CHANGELOG.md
@@ -0,0 +1,425 @@
+# v0.10.2 - 2023/03/20
+
+### New features
+
+* Support DebugDOT option for debugging encoder ( #440 )
+
+### Fix bugs
+
+* Fix combination of embedding structure and omitempty option ( #442 )
+
+# v0.10.1 - 2023/03/13
+
+### Fix bugs
+
+* Fix checkptr error for array decoder ( #415 )
+* Fix added buffer size check when decoding key ( #430 )
+* Fix handling of anonymous fields other than struct ( #431 )
+* Fix to not optimize when lower conversion can't handle byte-by-byte ( #432 )
+* Fix a problem that MarshalIndent does not work when UnorderedMap is specified ( #435 )
+* Fix mapDecoder.DecodeStream() for empty objects containing whitespace ( #425 )
+* Fix an issue that could not set the correct NextField for fields in the embedded structure ( #438 )
+
+# v0.10.0 - 2022/11/29
+
+### New features
+
+* Support JSON Path ( #250 )
+
+### Fix bugs
+
+* Fix marshaler for map's key ( #409 )
+
+# v0.9.11 - 2022/08/18
+
+### Fix bugs
+
+* Fix unexpected behavior when buffer ends with backslash ( #383 )
+* Fix stream decoding of escaped character ( #387 )
+
+# v0.9.10 - 2022/07/15
+
+### Fix bugs
+
+* Fix boundary exception of type caching ( #382 )
+
+# v0.9.9 - 2022/07/15
+
+### Fix bugs
+
+* Fix encoding of directed interface with typed nil ( #377 )
+* Fix embedded primitive type encoding using alias ( #378 )
+* Fix slice/array type encoding with types implementing MarshalJSON ( #379 )
+* Fix unicode decoding when the expected buffer state is not met after reading ( #380 )
+
+# v0.9.8 - 2022/06/30
+
+### Fix bugs
+
+* Fix decoding of surrogate-pair ( #365 )
+* Fix handling of embedded primitive type ( #366 )
+* Add validation of escape sequence for decoder ( #367 )
+* Fix stream tokenizing respecting UseNumber ( #369 )
+* Fix encoding when struct pointer type that implements Marshal JSON is embedded ( #375 )
+
+### Improve performance
+
+* Improve performance of linkRecursiveCode ( #368 )
+
+# v0.9.7 - 2022/04/22
+
+### Fix bugs
+
+#### Encoder
+
+* Add filtering process for encoding on slow path ( #355 )
+* Fix encoding of interface{} with pointer type ( #363 )
+
+#### Decoder
+
+* Fix map key decoder that implements UnmarshalJSON ( #353 )
+* Fix decoding of []uint8 type ( #361 )
+
+### New features
+
+* Add DebugWith option for encoder ( #356 )
+
+# v0.9.6 - 2022/03/22
+
+### Fix bugs
+
+* Correct the handling of the minimum value of int type for decoder ( #344 )
+* Fix bugs of stream decoder's bufferSize ( #349 )
+* Add a guard to use typeptr more safely ( #351 )
+
+### Improve decoder performance
+
+* Improve escapeString's performance ( #345 )
+
+### Others
+
+* Update go version for CI ( #347 )
+
+# v0.9.5 - 2022/03/04
+
+### Fix bugs
+
+* Fix panic when decoding time.Time with context ( #328 )
+* Fix reading the next character in buffer to nul consideration ( #338 )
+* Fix incorrect handling on skipValue ( #341 )
+
+### Improve decoder performance
+
+* Improve performance when a payload contains escape sequence ( #334 )
+
+# v0.9.4 - 2022/01/21
+
+* Fix IsNilForMarshaler for string type with omitempty ( #323 )
+* Fix the case where the embedded field is at the end ( #326 )
+
+# v0.9.3 - 2022/01/14
+
+* Fix logic of removing struct field for decoder ( #322 )
+
+# v0.9.2 - 2022/01/14
+
+* Add invalid decoder to delay type error judgment at decode ( #321 )
+
+# v0.9.1 - 2022/01/11
+
+* Fix encoding of MarshalText/MarshalJSON operation with head offset ( #319 )
+
+# v0.9.0 - 2022/01/05
+
+### New feature
+
+* Supports dynamic filtering of struct fields ( #314 )
+
+### Improve encoding performance
+
+* Improve map encoding performance ( #310 )
+* Optimize encoding path for escaped string ( #311 )
+* Add encoding option for performance ( #312 )
+
+### Fix bugs
+
+* Fix panic at encoding map value on 1.18 ( #310 )
+* Fix MarshalIndent for interface type ( #317 )
+
+# v0.8.1 - 2021/12/05
+
+* Fix operation conversion from PtrHead to Head in Recursive type ( #305 )
+
+# v0.8.0 - 2021/12/02
+
+* Fix embedded field conflict behavior ( #300 )
+* Refactor compiler for encoder ( #301 #302 )
+
+# v0.7.10 - 2021/10/16
+
+* Fix conversion from pointer to uint64  ( #294 )
+
+# v0.7.9 - 2021/09/28
+
+* Fix encoding of nil value about interface type that has method ( #291 )
+
+# v0.7.8 - 2021/09/01
+
+* Fix mapassign_faststr for indirect struct type ( #283 )
+* Fix encoding of not empty interface type ( #284 )
+* Fix encoding of empty struct interface type ( #286 )
+
+# v0.7.7 - 2021/08/25
+
+* Fix invalid utf8 on stream decoder ( #279 )
+* Fix buffer length bug on string stream decoder ( #280 )
+
+Thank you @orisano !!
+
+# v0.7.6 - 2021/08/13
+
+* Fix nil slice assignment ( #276 )
+* Improve error message ( #277 )
+
+# v0.7.5 - 2021/08/12
+
+* Fix encoding of embedded struct with tags ( #265 )
+* Fix encoding of embedded struct that isn't first field ( #272 )
+* Fix decoding of binary type with escaped char ( #273 )
+
+# v0.7.4 - 2021/07/06
+
+* Fix encoding of indirect layout structure ( #264 )
+
+# v0.7.3 - 2021/06/29
+
+* Fix encoding of pointer type in empty interface ( #262 )
+
+# v0.7.2 - 2021/06/26
+
+### Fix decoder
+
+* Add decoder for func type to fix decoding of nil function value ( #257 )
+* Fix stream decoding of []byte type ( #258 )
+
+### Performance
+
+* Improve decoding performance of map[string]interface{} type ( use `mapassign_faststr` ) ( #256 )
+* Improve encoding performance of empty interface type ( remove recursive calling of `vm.Run` ) ( #259 )
+
+### Benchmark
+
+* Add bytedance/sonic as benchmark target ( #254 )
+
+# v0.7.1 - 2021/06/18
+
+### Fix decoder
+
+* Fix error when unmarshal empty array ( #253 )
+
+# v0.7.0 - 2021/06/12
+
+### Support context for MarshalJSON and UnmarshalJSON ( #248 )
+
+* json.MarshalContext(context.Context, interface{}, ...json.EncodeOption) ([]byte, error)
+* json.NewEncoder(io.Writer).EncodeContext(context.Context, interface{}, ...json.EncodeOption) error
+* json.UnmarshalContext(context.Context, []byte, interface{}, ...json.DecodeOption) error
+* json.NewDecoder(io.Reader).DecodeContext(context.Context, interface{}) error
+
+```go
+type MarshalerContext interface {
+  MarshalJSON(context.Context) ([]byte, error)
+}
+
+type UnmarshalerContext interface {
+  UnmarshalJSON(context.Context, []byte) error
+}
+```
+
+### Add DecodeFieldPriorityFirstWin option ( #242 )
+
+In the default behavior, go-json, like encoding/json, will reflect the result of the last evaluation when a field with the same name exists. I've added new options to allow you to change this behavior. `json.DecodeFieldPriorityFirstWin` option reflects the result of the first evaluation if a field with the same name exists. This behavior has a performance advantage as it allows the subsequent strings to be skipped if all fields have been evaluated.
+
+### Fix encoder
+
+* Fix indent number contains recursive type ( #249 )
+* Fix encoding of using empty interface as map key ( #244 )
+
+### Fix decoder
+
+* Fix decoding fields containing escaped characters ( #237 )
+
+### Refactor
+
+* Move some tests to subdirectory ( #243 )
+* Refactor package layout for decoder ( #238 )
+
+# v0.6.1 - 2021/06/02
+
+### Fix encoder
+
+* Fix value of totalLength for encoding ( #236 )
+
+# v0.6.0 - 2021/06/01
+
+### Support Colorize option for encoding (#233)
+
+```go
+b, err := json.MarshalWithOption(v, json.Colorize(json.DefaultColorScheme))
+if err != nil {
+  ...
+}
+fmt.Println(string(b)) // print colored json
+```
+
+### Refactor
+
+* Fix opcode layout - Adjust memory layout of the opcode to 128 bytes in a 64-bit environment ( #230 )
+* Refactor encode option ( #231 )
+* Refactor escape string ( #232 )
+
+# v0.5.1 - 2021/5/20
+
+### Optimization
+
+* Add type addrShift to enable bigger encoder/decoder cache ( #213 )
+
+### Fix decoder
+
+* Keep original reference of slice element ( #229 )
+
+### Refactor
+
+* Refactor Debug mode for encoding ( #226 )
+* Generate VM sources for encoding ( #227 )
+* Refactor validator for null/true/false for decoding ( #221 )
+
+# v0.5.0 - 2021/5/9
+
+### Supports using omitempty and string tags at the same time ( #216 )
+
+### Fix decoder
+
+* Fix stream decoder for unicode char ( #215 )
+* Fix decoding of slice element ( #219 )
+* Fix calculating of buffer length for stream decoder ( #220 )
+
+### Refactor
+
+* replace skipWhiteSpace goto by loop ( #212 )
+
+# v0.4.14 - 2021/5/4
+
+### Benchmark
+
+* Add valyala/fastjson to benchmark ( #193 )
+* Add benchmark task for CI ( #211 )
+
+### Fix decoder
+
+* Fix decoding of slice with unmarshal json type ( #198 )
+* Fix decoding of null value for interface type that does not implement Unmarshaler ( #205 )
+* Fix decoding of null value to []byte by json.Unmarshal ( #206 )
+* Fix decoding of backslash char at the end of string ( #207 )
+* Fix stream decoder for null/true/false value ( #208 )
+* Fix stream decoder for slow reader ( #211 )
+
+### Performance
+
+* If cap of slice is enough, reuse slice data for compatibility with encoding/json ( #200 )
+
+# v0.4.13 - 2021/4/20
+
+### Fix json.Compact and json.Indent
+
+* Support validation the input buffer for json.Compact and json.Indent ( #189 )
+* Optimize json.Compact and json.Indent ( improve memory footprint ) ( #190 )
+
+# v0.4.12 - 2021/4/15
+
+### Fix encoder
+
+* Fix unnecessary indent for empty slice type ( #181 )
+* Fix encoding of omitempty feature for the slice or interface type ( #183 )
+* Fix encoding custom types zero values with omitempty when marshaller exists ( #187 )
+
+### Fix decoder
+
+* Fix decoder for invalid top level value ( #184 )
+* Fix decoder for invalid number value ( #185 )
+
+# v0.4.11 - 2021/4/3
+
+* Improve decoder performance for interface type
+
+# v0.4.10 - 2021/4/2
+
+### Fix encoder
+
+* Fixed a bug when encoding slice and map containing recursive structures
+* Fixed a logic to determine if indirect reference
+
+# v0.4.9 - 2021/3/29
+
+### Add debug mode
+
+If you use `json.MarshalWithOption(v, json.Debug())` and `panic` occurred in `go-json`, produces debug information to console.
+
+### Support a new feature to compatible with encoding/json
+
+- invalid UTF-8 is coerced to valid UTF-8 ( without performance down )
+
+### Fix encoder
+
+- Fixed handling of MarshalJSON of function type
+
+### Fix decoding of slice of pointer type
+
+If there is a pointer value, go-json will use it. (This behavior is necessary to achieve the ability to prioritize pre-filled values). However, since slices are reused internally, there was a bug that referred to the previous pointer value. Therefore, it is not necessary to refer to the pointer value in advance for the slice element, so we explicitly initialize slice element by `nil`.
+
+# v0.4.8 - 2021/3/21
+
+### Reduce memory usage at compile time
+
+* go-json have used about 2GB of memory at compile time, but now it can compile with about less than 550MB.
+
+### Fix any encoder's bug
+
+* Add many test cases for encoder
+* Fix composite type ( slice/array/map )
+* Fix pointer types
+* Fix encoding of MarshalJSON or MarshalText or json.Number type
+
+### Refactor encoder
+
+* Change package layout for reducing memory usage at compile
+* Remove anonymous and only operation
+* Remove root property from encodeCompileContext and opcode
+
+### Fix CI
+
+* Add Go 1.16
+* Remove Go 1.13
+* Fix `make cover` task
+
+### Number/Delim/Token/RawMessage use the types defined in encoding/json by type alias
+
+# v0.4.7 - 2021/02/22
+
+### Fix decoder
+
+* Fix decoding of deep recursive structure
+* Fix decoding of embedded unexported pointer field
+* Fix invalid test case
+* Fix decoding of invalid value
+* Fix decoding of prefilled value
+* Fix not being able to return UnmarshalTypeError when it should be returned
+* Fix decoding of null value
+* Fix decoding of type of null string
+* Use pre allocated pointer if exists it at decoding
+
+### Reduce memory usage at compile
+
+* Integrate int/int8/int16/int32/int64 and uint/uint8/uint16/uint32/uint64 operation to reduce memory usage at compile
+
+### Remove unnecessary optype
diff --git a/vendor/github.com/goccy/go-json/LICENSE b/vendor/github.com/goccy/go-json/LICENSE
new file mode 100644
index 00000000..6449c8bf
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2020 Masaaki Goshima
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/vendor/github.com/goccy/go-json/Makefile b/vendor/github.com/goccy/go-json/Makefile
new file mode 100644
index 00000000..c030577d
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/Makefile
@@ -0,0 +1,39 @@
+PKG := github.com/goccy/go-json
+
+BIN_DIR := $(CURDIR)/bin
+PKGS := $(shell go list ./... | grep -v internal/cmd|grep -v test)
+COVER_PKGS := $(foreach pkg,$(PKGS),$(subst $(PKG),.,$(pkg)))
+
+COMMA := ,
+EMPTY :=
+SPACE := $(EMPTY) $(EMPTY)
+COVERPKG_OPT := $(subst $(SPACE),$(COMMA),$(COVER_PKGS))
+
+$(BIN_DIR):
+	@mkdir -p $(BIN_DIR)
+
+.PHONY: cover
+cover:
+	go test -coverpkg=$(COVERPKG_OPT) -coverprofile=cover.out ./...
+
+.PHONY: cover-html
+cover-html: cover
+	go tool cover -html=cover.out
+
+.PHONY: lint
+lint: golangci-lint
+	$(BIN_DIR)/golangci-lint run
+
+golangci-lint: | $(BIN_DIR)
+	@{ \
+		set -e; \
+		GOLANGCI_LINT_TMP_DIR=$$(mktemp -d); \
+		cd $$GOLANGCI_LINT_TMP_DIR; \
+		go mod init tmp; \
+		GOBIN=$(BIN_DIR) go install github.com/golangci/golangci-lint/cmd/golangci-lint@v1.54.2; \
+		rm -rf $$GOLANGCI_LINT_TMP_DIR; \
+	}
+
+.PHONY: generate
+generate:
+	go generate ./internal/...
diff --git a/vendor/github.com/goccy/go-json/README.md b/vendor/github.com/goccy/go-json/README.md
new file mode 100644
index 00000000..7bacc54f
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/README.md
@@ -0,0 +1,529 @@
+# go-json
+
+![Go](https://github.com/goccy/go-json/workflows/Go/badge.svg)
+[![GoDoc](https://godoc.org/github.com/goccy/go-json?status.svg)](https://pkg.go.dev/github.com/goccy/go-json?tab=doc)
+[![codecov](https://codecov.io/gh/goccy/go-json/branch/master/graph/badge.svg)](https://codecov.io/gh/goccy/go-json)
+
+Fast JSON encoder/decoder compatible with encoding/json for Go
+
+<img width="400px" src="https://user-images.githubusercontent.com/209884/92572337-42b42900-f2bf-11ea-973a-c74a359553a5.png"></img>
+
+# Roadmap
+
+```
+* version ( expected release date )
+
+* v0.9.0
+ |
+ | while maintaining compatibility with encoding/json, we will add convenient APIs
+ |
+ v
+* v1.0.0
+```
+
+We are accepting requests for features that will be implemented between v0.9.0 and v.1.0.0.
+If you have the API you need, please submit your issue [here](https://github.com/goccy/go-json/issues).
+
+# Features
+
+- Drop-in replacement of `encoding/json`
+- Fast ( See [Benchmark section](https://github.com/goccy/go-json#benchmarks) )
+- Flexible customization with options
+- Coloring the encoded string
+- Can propagate context.Context to `MarshalJSON` or `UnmarshalJSON`
+- Can dynamically filter the fields of the structure type-safely
+
+# Installation
+
+```
+go get github.com/goccy/go-json
+```
+
+# How to use
+
+Replace import statement from `encoding/json` to `github.com/goccy/go-json`
+
+```
+-import "encoding/json"
++import "github.com/goccy/go-json"
+```
+
+# JSON library comparison
+
+|  name  |  encoder | decoder | compatible with `encoding/json` |
+| :----: | :------: | :-----: | :-----------------------------: |
+| encoding/json |  yes | yes | N/A |
+| [json-iterator/go](https://github.com/json-iterator/go) | yes | yes | partial |
+| [easyjson](https://github.com/mailru/easyjson) | yes | yes |  no |
+| [gojay](https://github.com/francoispqt/gojay) | yes | yes |  no |
+| [segmentio/encoding/json](https://github.com/segmentio/encoding/tree/master/json) | yes | yes | partial |
+| [jettison](https://github.com/wI2L/jettison) | yes | no | no |
+| [simdjson-go](https://github.com/minio/simdjson-go) | no | yes | no |
+| goccy/go-json | yes | yes | yes |
+
+- `json-iterator/go` isn't compatible with `encoding/json` in many ways (e.g. https://github.com/json-iterator/go/issues/229 ), but it hasn't been supported for a long time.
+- `segmentio/encoding/json` is well supported for encoders, but some are not supported for decoder APIs such as `Token` ( streaming decode )
+
+## Other libraries
+
+- [jingo](https://github.com/bet365/jingo)
+
+I tried the benchmark but it didn't work.
+Also, it seems to panic when it receives an unexpected value because there is no error handling...
+
+- [ffjson](https://github.com/pquerna/ffjson)
+
+Benchmarking gave very slow results.
+It seems that it is assumed that the user will use the buffer pool properly.
+Also, development seems to have already stopped
+
+# Benchmarks
+
+```
+$ cd benchmarks
+$ go test -bench .
+```
+
+## Encode
+
+<img width="700px" src="https://user-images.githubusercontent.com/209884/107126758-0845cb00-68f5-11eb-8db7-086fcf9bcfaa.png"></img>
+<img width="700px" src="https://user-images.githubusercontent.com/209884/107126757-07ad3480-68f5-11eb-87aa-858cc5eacfcb.png"></img>
+
+## Decode
+
+<img width="700" alt="" src="https://user-images.githubusercontent.com/209884/107979944-bd1d6d80-7002-11eb-944b-9d17b6674e3f.png">
+<img width="700" alt="" src="https://user-images.githubusercontent.com/209884/107979931-b989e680-7002-11eb-87a0-66fc22d90dd4.png">
+<img width="700" alt="" src="https://user-images.githubusercontent.com/209884/107979940-bc84d700-7002-11eb-9647-869bbc25c9d9.png">
+
+
+# Fuzzing
+
+[go-json-fuzz](https://github.com/goccy/go-json-fuzz) is the repository for fuzzing tests.
+If you run the test in this repository and find a bug, please commit to corpus to go-json-fuzz and report the issue to [go-json](https://github.com/goccy/go-json/issues).
+
+# How it works
+
+`go-json` is very fast in both encoding and decoding compared to other libraries.
+It's easier to implement by using automatic code generation for performance or by using a dedicated interface, but `go-json` dares to stick to compatibility with `encoding/json` and is the simple interface. Despite this, we are developing with the aim of being the fastest library.
+
+Here, we explain the various speed-up techniques implemented by `go-json`.
+
+## Basic technique
+
+The techniques listed here are the ones used by most of the libraries listed above.
+
+### Buffer reuse
+
+Since the only value required for the result of `json.Marshal(interface{}) ([]byte, error)` is `[]byte`, the only value that must be allocated during encoding is the return value `[]byte` .
+
+Also, as the number of allocations increases, the performance will be affected, so the number of allocations should be kept as low as possible when creating `[]byte`.
+
+Therefore, there is a technique to reduce the number of times a new buffer must be allocated by reusing the buffer used for the previous encoding by using `sync.Pool`.
+
+Finally, you allocate a buffer that is as long as the resulting buffer and copy the contents into it, you only need to allocate the buffer once in theory.
+
+```go
+type buffer struct {
+    data []byte
+}
+
+var bufPool = sync.Pool{
+    New: func() interface{} {
+        return &buffer{data: make([]byte, 0, 1024)}
+    },
+}
+
+buf := bufPool.Get().(*buffer)
+data := encode(buf.data) // reuse buf.data
+
+newBuf := make([]byte, len(data))
+copy(newBuf, buf)
+
+buf.data = data
+bufPool.Put(buf)
+```
+
+### Elimination of reflection
+
+As you know, the reflection operation is very slow.
+
+Therefore, using the fact that the address position where the type information is stored is fixed for each binary ( we call this `typeptr` ),
+we can use the address in the type information to call a pre-built optimized process.
+
+For example, you can get the address to the type information from `interface{}` as follows and you can use that information to call a process that does not have reflection.
+
+To process without reflection, pass a pointer (`unsafe.Pointer`) to the value is stored.
+
+```go
+
+type emptyInterface struct {
+    typ unsafe.Pointer
+    ptr unsafe.Pointer
+}
+
+var typeToEncoder = map[uintptr]func(unsafe.Pointer)([]byte, error){}
+
+func Marshal(v interface{}) ([]byte, error) {
+    iface := (*emptyInterface)(unsafe.Pointer(&v)
+    typeptr := uintptr(iface.typ)
+    if enc, exists := typeToEncoder[typeptr]; exists {
+        return enc(iface.ptr)
+    }
+    ...
+}
+```
+
+※ In reality, `typeToEncoder` can be referenced by multiple goroutines, so exclusive control is required.
+
+## Unique speed-up technique
+
+## Encoder
+
+### Do not escape arguments of `Marshal`
+
+`json.Marshal` and `json.Unmarshal` receive `interface{}` value and they perform type determination dynamically to process.
+In normal case, you need to use the `reflect` library to determine the type dynamically, but since `reflect.Type` is defined as `interface`, when you call the method of `reflect.Type`, The reflect's argument is escaped.
+
+Therefore, the arguments for `Marshal` and `Unmarshal` are always escaped to the heap.
+However, `go-json` can use the feature of `reflect.Type` while avoiding escaping.
+
+`reflect.Type` is defined as `interface`, but in reality `reflect.Type` is implemented only by the structure `rtype` defined in the `reflect` package.
+For this reason, to date `reflect.Type` is the same as `*reflect.rtype`.
+
+Therefore, by directly handling `*reflect.rtype`, which is an implementation of `reflect.Type`, it is possible to avoid escaping because it changes from `interface` to using `struct`.
+
+The technique for working with `*reflect.rtype` directly from `go-json` is implemented at [rtype.go](https://github.com/goccy/go-json/blob/master/internal/runtime/rtype.go)
+
+Also, the same technique is cut out as a library ( https://github.com/goccy/go-reflect )
+
+Initially this feature was the default behavior of `go-json`.
+But after careful testing, I found that I passed a large value to `json.Marshal()` and if the argument could not be assigned to the stack, it could not be properly escaped to the heap (a bug in the Go compiler).
+
+Therefore, this feature will be provided as an **optional** until this issue is resolved.
+
+To use it, add `NoEscape` like `MarshalNoEscape()`
+
+### Encoding using opcode sequence
+
+I explained that you can use `typeptr` to call a pre-built process from type information.
+
+In other libraries, this dedicated process is processed by making it an function calling like anonymous function, but function calls are inherently slow processes and should be avoided as much as possible.
+
+Therefore, `go-json` adopted the Instruction-based execution processing system, which is also used to implement virtual machines for programming language.
+
+If it is the first type to encode, create the opcode ( instruction ) sequence required for encoding.
+From the second time onward, use `typeptr` to get the cached pre-built opcode sequence and encode it based on it. An example of the opcode sequence is shown below.
+
+```go
+json.Marshal(struct{
+    X int `json:"x"`
+    Y string `json:"y"`
+}{X: 1, Y: "hello"})
+```
+
+When encoding a structure like the one above, create a sequence of opcodes like this:
+
+```
+- opStructFieldHead ( `{` )
+- opStructFieldInt ( `"x": 1,` )
+- opStructFieldString ( `"y": "hello"` )
+- opStructEnd ( `}` )
+- opEnd
+```
+
+※ When processing each operation, write the letters on the right.
+
+In addition, each opcode is managed by the following structure ( 
+Pseudo code ).
+
+```go
+type opType int
+const (
+    opStructFieldHead opType = iota
+    opStructFieldInt
+    opStructFieldStirng
+    opStructEnd
+    opEnd
+)
+type opcode struct {
+    op opType
+    key []byte
+    next *opcode
+}
+```
+
+The process of encoding using the opcode sequence is roughly implemented as follows.
+
+```go
+func encode(code *opcode, b []byte, p unsafe.Pointer) ([]byte, error) {
+    for {
+        switch code.op {
+        case opStructFieldHead:
+            b = append(b, '{')
+            code = code.next
+        case opStructFieldInt:
+            b = append(b, code.key...)
+            b = appendInt((*int)(unsafe.Pointer(uintptr(p)+code.offset)))
+            code = code.next
+        case opStructFieldString:
+            b = append(b, code.key...)
+            b = appendString((*string)(unsafe.Pointer(uintptr(p)+code.offset)))
+            code = code.next
+        case opStructEnd:
+            b = append(b, '}')
+            code = code.next
+        case opEnd:
+            goto END
+        }
+    }
+END:
+    return b, nil
+}
+```
+
+In this way, the huge `switch-case` is used to encode by manipulating the linked list opcodes to avoid unnecessary function calls.
+
+### Opcode sequence optimization
+
+One of the advantages of encoding using the opcode sequence is the ease of optimization.
+The opcode sequence mentioned above is actually converted into the following optimized operations and used.
+
+```
+- opStructFieldHeadInt ( `{"x": 1,` )
+- opStructEndString ( `"y": "hello"}` )
+- opEnd
+```
+
+It has been reduced from 5 opcodes to 3 opcodes !
+Reducing the number of opcodees means reducing the number of branches with `switch-case`.
+In other words, the closer the number of operations is to 1, the faster the processing can be performed.
+
+In `go-json`, optimization to reduce the number of opcodes itself like the above and it speeds up by preparing opcodes with optimized paths.
+
+### Change recursive call from CALL to JMP
+
+Recursive processing is required during encoding if the type is defined recursively as follows:
+
+```go
+type T struct {
+    X int
+    U *U
+}
+
+type U struct {
+    T *T
+}
+
+b, err := json.Marshal(&T{
+    X: 1,
+    U: &U{
+        T: &T{
+            X: 2,
+        },
+    },
+})
+fmt.Println(string(b)) // {"X":1,"U":{"T":{"X":2,"U":null}}}
+```
+
+In `go-json`, recursive processing is processed by the operation type of ` opStructFieldRecursive`.
+
+In this operation, after acquiring the opcode sequence used for recursive processing, the function is **not** called recursively as it is, but the necessary values ​​are saved by itself and implemented by moving to the next operation.
+
+The technique of implementing recursive processing with the `JMP` operation while avoiding the `CALL` operation is a famous technique for implementing a high-speed virtual machine.
+
+For more details, please refer to [the article](https://engineering.mercari.com/blog/entry/1599563768-081104c850) ( but Japanese only ).
+
+### Dispatch by typeptr from map to slice
+
+When retrieving the data cached from the type information by `typeptr`, we usually use map.
+Map requires exclusive control, so use `sync.Map` for a naive implementation.
+
+However, this is slow, so it's a good idea to use the `atomic` package for exclusive control as implemented by `segmentio/encoding/json` ( https://github.com/segmentio/encoding/blob/master/json/codec.go#L41-L55 ).
+
+This implementation slows down the set instead of speeding up the get, but it works well because of the nature of the library, it encodes much more for the same type.
+
+However, as a result of profiling, I noticed that `runtime.mapaccess2` accounts for a significant percentage of the execution time. So I thought if I could change the lookup from map to slice.
+
+There is an API named `typelinks` defined in the `runtime` package that the `reflect` package uses internally.
+This allows you to get all the type information defined in the binary at runtime.
+
+The fact that all type information can be acquired means that by constructing slices in advance with the acquired total number of type information, it is possible to look up with the value of `typeptr` without worrying about out-of-range access.
+
+However, if there is too much type information, it will use a lot of memory, so by default we will only use this optimization if the slice size fits within **2Mib** .
+
+If this approach is not available, it will fall back to the `atomic` based process described above.
+
+If you want to know more, please refer to the implementation [here](https://github.com/goccy/go-json/blob/master/internal/runtime/type.go#L36-L100)
+
+## Decoder
+
+### Dispatch by typeptr from map to slice
+
+Like the encoder, the decoder also uses typeptr to call the dedicated process.
+
+### Faster termination character inspection using NUL character
+
+In order to decode, you have to traverse the input buffer character by position.
+At that time, if you check whether the buffer has reached the end, it will be very slow.
+
+`buf` : `[]byte` type variable. holds the string passed to the decoder
+`cursor` : `int64` type variable. holds the current read position
+
+```go
+buflen := len(buf)
+for ; cursor < buflen; cursor++ { // compare cursor and buflen at all times, it is so slow.
+    switch buf[cursor] {
+    case ' ', '\n', '\r', '\t':
+    }
+}
+```
+
+Therefore, by adding the `NUL` (`\000`) character to the end of the read buffer as shown below, it is possible to check the termination character at the same time as other characters.
+
+```go
+for {
+    switch buf[cursor] {
+    case ' ', '\n', '\r', '\t':
+    case '\000':
+        return nil
+    }
+    cursor++
+}
+```
+
+### Use Boundary Check Elimination
+
+Due to the `NUL` character optimization, the Go compiler does a boundary check every time, even though `buf[cursor]` does not cause out-of-range access.
+
+Therefore, `go-json` eliminates boundary check by fetching characters for hotspot by pointer operation. For example, the following code.
+
+```go
+func char(ptr unsafe.Pointer, offset int64) byte {
+	return *(*byte)(unsafe.Pointer(uintptr(ptr) + uintptr(offset)))
+}
+
+p := (*sliceHeader)(&unsafe.Pointer(buf)).data
+for {
+    switch char(p, cursor) {
+    case ' ', '\n', '\r', '\t':
+    case '\000':
+        return nil
+    }
+    cursor++
+}
+```
+
+### Checking the existence of fields of struct using Bitmaps
+
+I found by the profiling result, in the struct decode, lookup process for field was taking a long time.
+
+For example, consider decoding a string like `{"a":1,"b":2,"c":3}` into the following structure:
+
+```go
+type T struct {
+    A int `json:"a"`
+    B int `json:"b"`
+    C int `json:"c"`
+}
+```
+
+At this time, it was found that it takes a lot of time to acquire the decoding process corresponding to the field from the field name as shown below during the decoding process.
+
+```go
+fieldName := decodeKey(buf, cursor) // "a" or "b" or "c"
+decoder, exists := fieldToDecoderMap[fieldName] // so slow
+if exists {
+    decoder(buf, cursor)
+} else {
+    skipValue(buf, cursor)
+}
+```
+
+To improve this process, `json-iterator/go` is optimized so that it can be branched by switch-case when the number of fields in the structure is 10 or less (switch-case is faster than map). However, there is a risk of hash collision because the value hashed by the FNV algorithm is used for conditional branching. Also, `gojay` processes this part at high speed by letting the library user yourself write `switch-case`.
+
+
+`go-json` considers and implements a new approach that is different from these. I call this **bitmap field optimization**.
+
+The range of values ​​per character can be represented by `[256]byte`. Also, if the number of fields in the structure is 8 or less, `int8` type can represent the state of each field.
+In other words, it has the following structure.
+
+- Base ( 8bit ): `00000000`
+- Key "a": `00000001` ( assign key "a" to the first bit )
+- Key "b": `00000010` ( assign key "b" to the second bit )
+- Key "c": `00000100` ( assign key "c" to the third bit )
+
+Bitmap structure is the following
+
+```
+        | key index(0) |
+------------------------
+ 0      | 00000000     |
+ 1      | 00000000     |
+~~      |              |
+97 (a)  | 00000001     |
+98 (b)  | 00000010     |
+99 (c)  | 00000100     |
+~~      |              |
+255     | 00000000     |
+```
+
+You can think of this as a Bitmap with a height of `256` and a width of the maximum string length in the field name.
+In other words, it can be represented by the following type .
+
+```go
+[maxFieldKeyLength][256]int8
+```
+
+When decoding a field character, check whether the corresponding character exists by referring to the pre-built bitmap like the following.
+
+```go
+var curBit int8 = math.MaxInt8 // 11111111
+
+c := char(buf, cursor)
+bit := bitmap[keyIdx][c]
+curBit &= bit
+if curBit == 0 {
+    // not found field
+}
+```
+
+If `curBit` is not `0` until the end of the field string, then the string is
+You may have hit one of the fields.
+But the possibility is that if the decoded string is shorter than the field string, you will get a false hit.
+
+- input: `{"a":1}`
+```go
+type T struct {
+    X int `json:"abc"`
+}
+```
+※ Since `a` is shorter than `abc`, it can decode to the end of the field character without `curBit` being 0.
+
+Rest assured. In this case, it doesn't matter because you can tell if you hit by comparing the string length of `a` with the string length of `abc`.
+
+Finally, calculate the position of the bit where `1` is set and get the corresponding value, and you're done.
+
+Using this technique, field lookups are possible with only bitwise operations and access to slices.
+
+`go-json` uses a similar technique for fields with 9 or more and 16 or less fields. At this time, Bitmap is constructed as `[maxKeyLen][256]int16` type.
+
+Currently, this optimization is not performed when the maximum length of the field name is long (specifically, 64 bytes or more) in addition to the limitation of the number of fields from the viewpoint of saving memory usage.
+
+### Others
+
+I have done a lot of other optimizations. I will find time to write about them. If you have any questions about what's written here or other optimizations, please visit the `#go-json` channel on `gophers.slack.com` .
+
+## Reference
+
+Regarding the story of go-json, there are the following articles in Japanese only.
+
+- https://speakerdeck.com/goccy/zui-su-falsejsonraiburariwoqiu-mete
+- https://engineering.mercari.com/blog/entry/1599563768-081104c850/
+
+# Looking for Sponsors
+
+I'm looking for sponsors this library. This library is being developed as a personal project in my spare time. If you want a quick response or problem resolution when using this library in your project, please register as a [sponsor](https://github.com/sponsors/goccy). I will cooperate as much as possible. Of course, this library is developed as an MIT license, so you can use it freely for free.
+
+# License
+
+MIT
diff --git a/vendor/github.com/goccy/go-json/color.go b/vendor/github.com/goccy/go-json/color.go
new file mode 100644
index 00000000..e80b22b4
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/color.go
@@ -0,0 +1,68 @@
+package json
+
+import (
+	"fmt"
+
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+type (
+	ColorFormat = encoder.ColorFormat
+	ColorScheme = encoder.ColorScheme
+)
+
+const escape = "\x1b"
+
+type colorAttr int
+
+//nolint:deadcode,varcheck
+const (
+	fgBlackColor colorAttr = iota + 30
+	fgRedColor
+	fgGreenColor
+	fgYellowColor
+	fgBlueColor
+	fgMagentaColor
+	fgCyanColor
+	fgWhiteColor
+)
+
+//nolint:deadcode,varcheck
+const (
+	fgHiBlackColor colorAttr = iota + 90
+	fgHiRedColor
+	fgHiGreenColor
+	fgHiYellowColor
+	fgHiBlueColor
+	fgHiMagentaColor
+	fgHiCyanColor
+	fgHiWhiteColor
+)
+
+func createColorFormat(attr colorAttr) ColorFormat {
+	return ColorFormat{
+		Header: wrapColor(attr),
+		Footer: resetColor(),
+	}
+}
+
+func wrapColor(attr colorAttr) string {
+	return fmt.Sprintf("%s[%dm", escape, attr)
+}
+
+func resetColor() string {
+	return wrapColor(colorAttr(0))
+}
+
+var (
+	DefaultColorScheme = &ColorScheme{
+		Int:       createColorFormat(fgHiMagentaColor),
+		Uint:      createColorFormat(fgHiMagentaColor),
+		Float:     createColorFormat(fgHiMagentaColor),
+		Bool:      createColorFormat(fgHiYellowColor),
+		String:    createColorFormat(fgHiGreenColor),
+		Binary:    createColorFormat(fgHiRedColor),
+		ObjectKey: createColorFormat(fgHiCyanColor),
+		Null:      createColorFormat(fgBlueColor),
+	}
+)
diff --git a/vendor/github.com/goccy/go-json/decode.go b/vendor/github.com/goccy/go-json/decode.go
new file mode 100644
index 00000000..74c6ac3b
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/decode.go
@@ -0,0 +1,263 @@
+package json
+
+import (
+	"context"
+	"fmt"
+	"io"
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/decoder"
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type Decoder struct {
+	s *decoder.Stream
+}
+
+const (
+	nul = '\000'
+)
+
+type emptyInterface struct {
+	typ *runtime.Type
+	ptr unsafe.Pointer
+}
+
+func unmarshal(data []byte, v interface{}, optFuncs ...DecodeOptionFunc) error {
+	src := make([]byte, len(data)+1) // append nul byte to the end
+	copy(src, data)
+
+	header := (*emptyInterface)(unsafe.Pointer(&v))
+
+	if err := validateType(header.typ, uintptr(header.ptr)); err != nil {
+		return err
+	}
+	dec, err := decoder.CompileToGetDecoder(header.typ)
+	if err != nil {
+		return err
+	}
+	ctx := decoder.TakeRuntimeContext()
+	ctx.Buf = src
+	ctx.Option.Flags = 0
+	for _, optFunc := range optFuncs {
+		optFunc(ctx.Option)
+	}
+	cursor, err := dec.Decode(ctx, 0, 0, header.ptr)
+	if err != nil {
+		decoder.ReleaseRuntimeContext(ctx)
+		return err
+	}
+	decoder.ReleaseRuntimeContext(ctx)
+	return validateEndBuf(src, cursor)
+}
+
+func unmarshalContext(ctx context.Context, data []byte, v interface{}, optFuncs ...DecodeOptionFunc) error {
+	src := make([]byte, len(data)+1) // append nul byte to the end
+	copy(src, data)
+
+	header := (*emptyInterface)(unsafe.Pointer(&v))
+
+	if err := validateType(header.typ, uintptr(header.ptr)); err != nil {
+		return err
+	}
+	dec, err := decoder.CompileToGetDecoder(header.typ)
+	if err != nil {
+		return err
+	}
+	rctx := decoder.TakeRuntimeContext()
+	rctx.Buf = src
+	rctx.Option.Flags = 0
+	rctx.Option.Flags |= decoder.ContextOption
+	rctx.Option.Context = ctx
+	for _, optFunc := range optFuncs {
+		optFunc(rctx.Option)
+	}
+	cursor, err := dec.Decode(rctx, 0, 0, header.ptr)
+	if err != nil {
+		decoder.ReleaseRuntimeContext(rctx)
+		return err
+	}
+	decoder.ReleaseRuntimeContext(rctx)
+	return validateEndBuf(src, cursor)
+}
+
+var (
+	pathDecoder = decoder.NewPathDecoder()
+)
+
+func extractFromPath(path *Path, data []byte, optFuncs ...DecodeOptionFunc) ([][]byte, error) {
+	if path.path.RootSelectorOnly {
+		return [][]byte{data}, nil
+	}
+	src := make([]byte, len(data)+1) // append nul byte to the end
+	copy(src, data)
+
+	ctx := decoder.TakeRuntimeContext()
+	ctx.Buf = src
+	ctx.Option.Flags = 0
+	ctx.Option.Flags |= decoder.PathOption
+	ctx.Option.Path = path.path
+	for _, optFunc := range optFuncs {
+		optFunc(ctx.Option)
+	}
+	paths, cursor, err := pathDecoder.DecodePath(ctx, 0, 0)
+	if err != nil {
+		decoder.ReleaseRuntimeContext(ctx)
+		return nil, err
+	}
+	decoder.ReleaseRuntimeContext(ctx)
+	if err := validateEndBuf(src, cursor); err != nil {
+		return nil, err
+	}
+	return paths, nil
+}
+
+func unmarshalNoEscape(data []byte, v interface{}, optFuncs ...DecodeOptionFunc) error {
+	src := make([]byte, len(data)+1) // append nul byte to the end
+	copy(src, data)
+
+	header := (*emptyInterface)(unsafe.Pointer(&v))
+
+	if err := validateType(header.typ, uintptr(header.ptr)); err != nil {
+		return err
+	}
+	dec, err := decoder.CompileToGetDecoder(header.typ)
+	if err != nil {
+		return err
+	}
+
+	ctx := decoder.TakeRuntimeContext()
+	ctx.Buf = src
+	ctx.Option.Flags = 0
+	for _, optFunc := range optFuncs {
+		optFunc(ctx.Option)
+	}
+	cursor, err := dec.Decode(ctx, 0, 0, noescape(header.ptr))
+	if err != nil {
+		decoder.ReleaseRuntimeContext(ctx)
+		return err
+	}
+	decoder.ReleaseRuntimeContext(ctx)
+	return validateEndBuf(src, cursor)
+}
+
+func validateEndBuf(src []byte, cursor int64) error {
+	for {
+		switch src[cursor] {
+		case ' ', '\t', '\n', '\r':
+			cursor++
+			continue
+		case nul:
+			return nil
+		}
+		return errors.ErrSyntax(
+			fmt.Sprintf("invalid character '%c' after top-level value", src[cursor]),
+			cursor+1,
+		)
+	}
+}
+
+//nolint:staticcheck
+//go:nosplit
+func noescape(p unsafe.Pointer) unsafe.Pointer {
+	x := uintptr(p)
+	return unsafe.Pointer(x ^ 0)
+}
+
+func validateType(typ *runtime.Type, p uintptr) error {
+	if typ == nil || typ.Kind() != reflect.Ptr || p == 0 {
+		return &InvalidUnmarshalError{Type: runtime.RType2Type(typ)}
+	}
+	return nil
+}
+
+// NewDecoder returns a new decoder that reads from r.
+//
+// The decoder introduces its own buffering and may
+// read data from r beyond the JSON values requested.
+func NewDecoder(r io.Reader) *Decoder {
+	s := decoder.NewStream(r)
+	return &Decoder{
+		s: s,
+	}
+}
+
+// Buffered returns a reader of the data remaining in the Decoder's
+// buffer. The reader is valid until the next call to Decode.
+func (d *Decoder) Buffered() io.Reader {
+	return d.s.Buffered()
+}
+
+// Decode reads the next JSON-encoded value from its
+// input and stores it in the value pointed to by v.
+//
+// See the documentation for Unmarshal for details about
+// the conversion of JSON into a Go value.
+func (d *Decoder) Decode(v interface{}) error {
+	return d.DecodeWithOption(v)
+}
+
+// DecodeContext reads the next JSON-encoded value from its
+// input and stores it in the value pointed to by v with context.Context.
+func (d *Decoder) DecodeContext(ctx context.Context, v interface{}) error {
+	d.s.Option.Flags |= decoder.ContextOption
+	d.s.Option.Context = ctx
+	return d.DecodeWithOption(v)
+}
+
+func (d *Decoder) DecodeWithOption(v interface{}, optFuncs ...DecodeOptionFunc) error {
+	header := (*emptyInterface)(unsafe.Pointer(&v))
+	typ := header.typ
+	ptr := uintptr(header.ptr)
+	typeptr := uintptr(unsafe.Pointer(typ))
+	// noescape trick for header.typ ( reflect.*rtype )
+	copiedType := *(**runtime.Type)(unsafe.Pointer(&typeptr))
+
+	if err := validateType(copiedType, ptr); err != nil {
+		return err
+	}
+
+	dec, err := decoder.CompileToGetDecoder(typ)
+	if err != nil {
+		return err
+	}
+	if err := d.s.PrepareForDecode(); err != nil {
+		return err
+	}
+	s := d.s
+	for _, optFunc := range optFuncs {
+		optFunc(s.Option)
+	}
+	if err := dec.DecodeStream(s, 0, header.ptr); err != nil {
+		return err
+	}
+	s.Reset()
+	return nil
+}
+
+func (d *Decoder) More() bool {
+	return d.s.More()
+}
+
+func (d *Decoder) Token() (Token, error) {
+	return d.s.Token()
+}
+
+// DisallowUnknownFields causes the Decoder to return an error when the destination
+// is a struct and the input contains object keys which do not match any
+// non-ignored, exported fields in the destination.
+func (d *Decoder) DisallowUnknownFields() {
+	d.s.DisallowUnknownFields = true
+}
+
+func (d *Decoder) InputOffset() int64 {
+	return d.s.TotalOffset()
+}
+
+// UseNumber causes the Decoder to unmarshal a number into an interface{} as a
+// Number instead of as a float64.
+func (d *Decoder) UseNumber() {
+	d.s.UseNumber = true
+}
diff --git a/vendor/github.com/goccy/go-json/docker-compose.yml b/vendor/github.com/goccy/go-json/docker-compose.yml
new file mode 100644
index 00000000..db40c79a
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/docker-compose.yml
@@ -0,0 +1,13 @@
+version: '2'
+services:
+  go-json:
+    image: golang:1.18
+    volumes:
+      - '.:/go/src/go-json'
+    deploy:
+      resources:
+        limits:
+          memory: 620M
+    working_dir: /go/src/go-json
+    command: |
+      sh -c "go test -c . && ls go-json.test"
diff --git a/vendor/github.com/goccy/go-json/encode.go b/vendor/github.com/goccy/go-json/encode.go
new file mode 100644
index 00000000..c5173825
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/encode.go
@@ -0,0 +1,326 @@
+package json
+
+import (
+	"context"
+	"io"
+	"os"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/encoder/vm"
+	"github.com/goccy/go-json/internal/encoder/vm_color"
+	"github.com/goccy/go-json/internal/encoder/vm_color_indent"
+	"github.com/goccy/go-json/internal/encoder/vm_indent"
+)
+
+// An Encoder writes JSON values to an output stream.
+type Encoder struct {
+	w                 io.Writer
+	enabledIndent     bool
+	enabledHTMLEscape bool
+	prefix            string
+	indentStr         string
+}
+
+// NewEncoder returns a new encoder that writes to w.
+func NewEncoder(w io.Writer) *Encoder {
+	return &Encoder{w: w, enabledHTMLEscape: true}
+}
+
+// Encode writes the JSON encoding of v to the stream, followed by a newline character.
+//
+// See the documentation for Marshal for details about the conversion of Go values to JSON.
+func (e *Encoder) Encode(v interface{}) error {
+	return e.EncodeWithOption(v)
+}
+
+// EncodeWithOption call Encode with EncodeOption.
+func (e *Encoder) EncodeWithOption(v interface{}, optFuncs ...EncodeOptionFunc) error {
+	ctx := encoder.TakeRuntimeContext()
+	ctx.Option.Flag = 0
+
+	err := e.encodeWithOption(ctx, v, optFuncs...)
+
+	encoder.ReleaseRuntimeContext(ctx)
+	return err
+}
+
+// EncodeContext call Encode with context.Context and EncodeOption.
+func (e *Encoder) EncodeContext(ctx context.Context, v interface{}, optFuncs ...EncodeOptionFunc) error {
+	rctx := encoder.TakeRuntimeContext()
+	rctx.Option.Flag = 0
+	rctx.Option.Flag |= encoder.ContextOption
+	rctx.Option.Context = ctx
+
+	err := e.encodeWithOption(rctx, v, optFuncs...) //nolint: contextcheck
+
+	encoder.ReleaseRuntimeContext(rctx)
+	return err
+}
+
+func (e *Encoder) encodeWithOption(ctx *encoder.RuntimeContext, v interface{}, optFuncs ...EncodeOptionFunc) error {
+	if e.enabledHTMLEscape {
+		ctx.Option.Flag |= encoder.HTMLEscapeOption
+	}
+	ctx.Option.Flag |= encoder.NormalizeUTF8Option
+	ctx.Option.DebugOut = os.Stdout
+	for _, optFunc := range optFuncs {
+		optFunc(ctx.Option)
+	}
+	var (
+		buf []byte
+		err error
+	)
+	if e.enabledIndent {
+		buf, err = encodeIndent(ctx, v, e.prefix, e.indentStr)
+	} else {
+		buf, err = encode(ctx, v)
+	}
+	if err != nil {
+		return err
+	}
+	if e.enabledIndent {
+		buf = buf[:len(buf)-2]
+	} else {
+		buf = buf[:len(buf)-1]
+	}
+	buf = append(buf, '\n')
+	if _, err := e.w.Write(buf); err != nil {
+		return err
+	}
+	return nil
+}
+
+// SetEscapeHTML specifies whether problematic HTML characters should be escaped inside JSON quoted strings.
+// The default behavior is to escape &, <, and > to \u0026, \u003c, and \u003e to avoid certain safety problems that can arise when embedding JSON in HTML.
+//
+// In non-HTML settings where the escaping interferes with the readability of the output, SetEscapeHTML(false) disables this behavior.
+func (e *Encoder) SetEscapeHTML(on bool) {
+	e.enabledHTMLEscape = on
+}
+
+// SetIndent instructs the encoder to format each subsequent encoded value as if indented by the package-level function Indent(dst, src, prefix, indent).
+// Calling SetIndent("", "") disables indentation.
+func (e *Encoder) SetIndent(prefix, indent string) {
+	if prefix == "" && indent == "" {
+		e.enabledIndent = false
+		return
+	}
+	e.prefix = prefix
+	e.indentStr = indent
+	e.enabledIndent = true
+}
+
+func marshalContext(ctx context.Context, v interface{}, optFuncs ...EncodeOptionFunc) ([]byte, error) {
+	rctx := encoder.TakeRuntimeContext()
+	rctx.Option.Flag = 0
+	rctx.Option.Flag = encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option | encoder.ContextOption
+	rctx.Option.Context = ctx
+	for _, optFunc := range optFuncs {
+		optFunc(rctx.Option)
+	}
+
+	buf, err := encode(rctx, v) //nolint: contextcheck
+	if err != nil {
+		encoder.ReleaseRuntimeContext(rctx)
+		return nil, err
+	}
+
+	// this line exists to escape call of `runtime.makeslicecopy` .
+	// if use `make([]byte, len(buf)-1)` and `copy(copied, buf)`,
+	// dst buffer size and src buffer size are differrent.
+	// in this case, compiler uses `runtime.makeslicecopy`, but it is slow.
+	buf = buf[:len(buf)-1]
+	copied := make([]byte, len(buf))
+	copy(copied, buf)
+
+	encoder.ReleaseRuntimeContext(rctx)
+	return copied, nil
+}
+
+func marshal(v interface{}, optFuncs ...EncodeOptionFunc) ([]byte, error) {
+	ctx := encoder.TakeRuntimeContext()
+
+	ctx.Option.Flag = 0
+	ctx.Option.Flag |= (encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option)
+	for _, optFunc := range optFuncs {
+		optFunc(ctx.Option)
+	}
+
+	buf, err := encode(ctx, v)
+	if err != nil {
+		encoder.ReleaseRuntimeContext(ctx)
+		return nil, err
+	}
+
+	// this line exists to escape call of `runtime.makeslicecopy` .
+	// if use `make([]byte, len(buf)-1)` and `copy(copied, buf)`,
+	// dst buffer size and src buffer size are differrent.
+	// in this case, compiler uses `runtime.makeslicecopy`, but it is slow.
+	buf = buf[:len(buf)-1]
+	copied := make([]byte, len(buf))
+	copy(copied, buf)
+
+	encoder.ReleaseRuntimeContext(ctx)
+	return copied, nil
+}
+
+func marshalNoEscape(v interface{}) ([]byte, error) {
+	ctx := encoder.TakeRuntimeContext()
+
+	ctx.Option.Flag = 0
+	ctx.Option.Flag |= (encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option)
+
+	buf, err := encodeNoEscape(ctx, v)
+	if err != nil {
+		encoder.ReleaseRuntimeContext(ctx)
+		return nil, err
+	}
+
+	// this line exists to escape call of `runtime.makeslicecopy` .
+	// if use `make([]byte, len(buf)-1)` and `copy(copied, buf)`,
+	// dst buffer size and src buffer size are differrent.
+	// in this case, compiler uses `runtime.makeslicecopy`, but it is slow.
+	buf = buf[:len(buf)-1]
+	copied := make([]byte, len(buf))
+	copy(copied, buf)
+
+	encoder.ReleaseRuntimeContext(ctx)
+	return copied, nil
+}
+
+func marshalIndent(v interface{}, prefix, indent string, optFuncs ...EncodeOptionFunc) ([]byte, error) {
+	ctx := encoder.TakeRuntimeContext()
+
+	ctx.Option.Flag = 0
+	ctx.Option.Flag |= (encoder.HTMLEscapeOption | encoder.NormalizeUTF8Option | encoder.IndentOption)
+	for _, optFunc := range optFuncs {
+		optFunc(ctx.Option)
+	}
+
+	buf, err := encodeIndent(ctx, v, prefix, indent)
+	if err != nil {
+		encoder.ReleaseRuntimeContext(ctx)
+		return nil, err
+	}
+
+	buf = buf[:len(buf)-2]
+	copied := make([]byte, len(buf))
+	copy(copied, buf)
+
+	encoder.ReleaseRuntimeContext(ctx)
+	return copied, nil
+}
+
+func encode(ctx *encoder.RuntimeContext, v interface{}) ([]byte, error) {
+	b := ctx.Buf[:0]
+	if v == nil {
+		b = encoder.AppendNull(ctx, b)
+		b = encoder.AppendComma(ctx, b)
+		return b, nil
+	}
+	header := (*emptyInterface)(unsafe.Pointer(&v))
+	typ := header.typ
+
+	typeptr := uintptr(unsafe.Pointer(typ))
+	codeSet, err := encoder.CompileToGetCodeSet(ctx, typeptr)
+	if err != nil {
+		return nil, err
+	}
+
+	p := uintptr(header.ptr)
+	ctx.Init(p, codeSet.CodeLength)
+	ctx.KeepRefs = append(ctx.KeepRefs, header.ptr)
+
+	buf, err := encodeRunCode(ctx, b, codeSet)
+	if err != nil {
+		return nil, err
+	}
+	ctx.Buf = buf
+	return buf, nil
+}
+
+func encodeNoEscape(ctx *encoder.RuntimeContext, v interface{}) ([]byte, error) {
+	b := ctx.Buf[:0]
+	if v == nil {
+		b = encoder.AppendNull(ctx, b)
+		b = encoder.AppendComma(ctx, b)
+		return b, nil
+	}
+	header := (*emptyInterface)(unsafe.Pointer(&v))
+	typ := header.typ
+
+	typeptr := uintptr(unsafe.Pointer(typ))
+	codeSet, err := encoder.CompileToGetCodeSet(ctx, typeptr)
+	if err != nil {
+		return nil, err
+	}
+
+	p := uintptr(header.ptr)
+	ctx.Init(p, codeSet.CodeLength)
+	buf, err := encodeRunCode(ctx, b, codeSet)
+	if err != nil {
+		return nil, err
+	}
+
+	ctx.Buf = buf
+	return buf, nil
+}
+
+func encodeIndent(ctx *encoder.RuntimeContext, v interface{}, prefix, indent string) ([]byte, error) {
+	b := ctx.Buf[:0]
+	if v == nil {
+		b = encoder.AppendNull(ctx, b)
+		b = encoder.AppendCommaIndent(ctx, b)
+		return b, nil
+	}
+	header := (*emptyInterface)(unsafe.Pointer(&v))
+	typ := header.typ
+
+	typeptr := uintptr(unsafe.Pointer(typ))
+	codeSet, err := encoder.CompileToGetCodeSet(ctx, typeptr)
+	if err != nil {
+		return nil, err
+	}
+
+	p := uintptr(header.ptr)
+	ctx.Init(p, codeSet.CodeLength)
+	buf, err := encodeRunIndentCode(ctx, b, codeSet, prefix, indent)
+
+	ctx.KeepRefs = append(ctx.KeepRefs, header.ptr)
+
+	if err != nil {
+		return nil, err
+	}
+
+	ctx.Buf = buf
+	return buf, nil
+}
+
+func encodeRunCode(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	if (ctx.Option.Flag & encoder.DebugOption) != 0 {
+		if (ctx.Option.Flag & encoder.ColorizeOption) != 0 {
+			return vm_color.DebugRun(ctx, b, codeSet)
+		}
+		return vm.DebugRun(ctx, b, codeSet)
+	}
+	if (ctx.Option.Flag & encoder.ColorizeOption) != 0 {
+		return vm_color.Run(ctx, b, codeSet)
+	}
+	return vm.Run(ctx, b, codeSet)
+}
+
+func encodeRunIndentCode(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet, prefix, indent string) ([]byte, error) {
+	ctx.Prefix = []byte(prefix)
+	ctx.IndentStr = []byte(indent)
+	if (ctx.Option.Flag & encoder.DebugOption) != 0 {
+		if (ctx.Option.Flag & encoder.ColorizeOption) != 0 {
+			return vm_color_indent.DebugRun(ctx, b, codeSet)
+		}
+		return vm_indent.DebugRun(ctx, b, codeSet)
+	}
+	if (ctx.Option.Flag & encoder.ColorizeOption) != 0 {
+		return vm_color_indent.Run(ctx, b, codeSet)
+	}
+	return vm_indent.Run(ctx, b, codeSet)
+}
diff --git a/vendor/github.com/goccy/go-json/error.go b/vendor/github.com/goccy/go-json/error.go
new file mode 100644
index 00000000..5b2dcee5
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/error.go
@@ -0,0 +1,41 @@
+package json
+
+import (
+	"github.com/goccy/go-json/internal/errors"
+)
+
+// Before Go 1.2, an InvalidUTF8Error was returned by Marshal when
+// attempting to encode a string value with invalid UTF-8 sequences.
+// As of Go 1.2, Marshal instead coerces the string to valid UTF-8 by
+// replacing invalid bytes with the Unicode replacement rune U+FFFD.
+//
+// Deprecated: No longer used; kept for compatibility.
+type InvalidUTF8Error = errors.InvalidUTF8Error
+
+// An InvalidUnmarshalError describes an invalid argument passed to Unmarshal.
+// (The argument to Unmarshal must be a non-nil pointer.)
+type InvalidUnmarshalError = errors.InvalidUnmarshalError
+
+// A MarshalerError represents an error from calling a MarshalJSON or MarshalText method.
+type MarshalerError = errors.MarshalerError
+
+// A SyntaxError is a description of a JSON syntax error.
+type SyntaxError = errors.SyntaxError
+
+// An UnmarshalFieldError describes a JSON object key that
+// led to an unexported (and therefore unwritable) struct field.
+//
+// Deprecated: No longer used; kept for compatibility.
+type UnmarshalFieldError = errors.UnmarshalFieldError
+
+// An UnmarshalTypeError describes a JSON value that was
+// not appropriate for a value of a specific Go type.
+type UnmarshalTypeError = errors.UnmarshalTypeError
+
+// An UnsupportedTypeError is returned by Marshal when attempting
+// to encode an unsupported value type.
+type UnsupportedTypeError = errors.UnsupportedTypeError
+
+type UnsupportedValueError = errors.UnsupportedValueError
+
+type PathError = errors.PathError
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/anonymous_field.go b/vendor/github.com/goccy/go-json/internal/decoder/anonymous_field.go
new file mode 100644
index 00000000..b6876cf0
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/anonymous_field.go
@@ -0,0 +1,41 @@
+package decoder
+
+import (
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type anonymousFieldDecoder struct {
+	structType *runtime.Type
+	offset     uintptr
+	dec        Decoder
+}
+
+func newAnonymousFieldDecoder(structType *runtime.Type, offset uintptr, dec Decoder) *anonymousFieldDecoder {
+	return &anonymousFieldDecoder{
+		structType: structType,
+		offset:     offset,
+		dec:        dec,
+	}
+}
+
+func (d *anonymousFieldDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	if *(*unsafe.Pointer)(p) == nil {
+		*(*unsafe.Pointer)(p) = unsafe_New(d.structType)
+	}
+	p = *(*unsafe.Pointer)(p)
+	return d.dec.DecodeStream(s, depth, unsafe.Pointer(uintptr(p)+d.offset))
+}
+
+func (d *anonymousFieldDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	if *(*unsafe.Pointer)(p) == nil {
+		*(*unsafe.Pointer)(p) = unsafe_New(d.structType)
+	}
+	p = *(*unsafe.Pointer)(p)
+	return d.dec.Decode(ctx, cursor, depth, unsafe.Pointer(uintptr(p)+d.offset))
+}
+
+func (d *anonymousFieldDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return d.dec.DecodePath(ctx, cursor, depth)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/array.go b/vendor/github.com/goccy/go-json/internal/decoder/array.go
new file mode 100644
index 00000000..4b23ed43
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/array.go
@@ -0,0 +1,176 @@
+package decoder
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type arrayDecoder struct {
+	elemType     *runtime.Type
+	size         uintptr
+	valueDecoder Decoder
+	alen         int
+	structName   string
+	fieldName    string
+	zeroValue    unsafe.Pointer
+}
+
+func newArrayDecoder(dec Decoder, elemType *runtime.Type, alen int, structName, fieldName string) *arrayDecoder {
+	// workaround to avoid checkptr errors. cannot use `*(*unsafe.Pointer)(unsafe_New(elemType))` directly.
+	zeroValuePtr := unsafe_New(elemType)
+	zeroValue := **(**unsafe.Pointer)(unsafe.Pointer(&zeroValuePtr))
+	return &arrayDecoder{
+		valueDecoder: dec,
+		elemType:     elemType,
+		size:         elemType.Size(),
+		alen:         alen,
+		structName:   structName,
+		fieldName:    fieldName,
+		zeroValue:    zeroValue,
+	}
+}
+
+func (d *arrayDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+	}
+
+	for {
+		switch s.char() {
+		case ' ', '\n', '\t', '\r':
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return err
+			}
+			return nil
+		case '[':
+			idx := 0
+			s.cursor++
+			if s.skipWhiteSpace() == ']' {
+				for idx < d.alen {
+					*(*unsafe.Pointer)(unsafe.Pointer(uintptr(p) + uintptr(idx)*d.size)) = d.zeroValue
+					idx++
+				}
+				s.cursor++
+				return nil
+			}
+			for {
+				if idx < d.alen {
+					if err := d.valueDecoder.DecodeStream(s, depth, unsafe.Pointer(uintptr(p)+uintptr(idx)*d.size)); err != nil {
+						return err
+					}
+				} else {
+					if err := s.skipValue(depth); err != nil {
+						return err
+					}
+				}
+				idx++
+				switch s.skipWhiteSpace() {
+				case ']':
+					for idx < d.alen {
+						*(*unsafe.Pointer)(unsafe.Pointer(uintptr(p) + uintptr(idx)*d.size)) = d.zeroValue
+						idx++
+					}
+					s.cursor++
+					return nil
+				case ',':
+					s.cursor++
+					continue
+				case nul:
+					if s.read() {
+						s.cursor++
+						continue
+					}
+					goto ERROR
+				default:
+					goto ERROR
+				}
+			}
+		case nul:
+			if s.read() {
+				continue
+			}
+			goto ERROR
+		default:
+			goto ERROR
+		}
+		s.cursor++
+	}
+ERROR:
+	return errors.ErrUnexpectedEndOfJSON("array", s.totalOffset())
+}
+
+func (d *arrayDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+	}
+
+	for {
+		switch buf[cursor] {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return 0, err
+			}
+			cursor += 4
+			return cursor, nil
+		case '[':
+			idx := 0
+			cursor++
+			cursor = skipWhiteSpace(buf, cursor)
+			if buf[cursor] == ']' {
+				for idx < d.alen {
+					*(*unsafe.Pointer)(unsafe.Pointer(uintptr(p) + uintptr(idx)*d.size)) = d.zeroValue
+					idx++
+				}
+				cursor++
+				return cursor, nil
+			}
+			for {
+				if idx < d.alen {
+					c, err := d.valueDecoder.Decode(ctx, cursor, depth, unsafe.Pointer(uintptr(p)+uintptr(idx)*d.size))
+					if err != nil {
+						return 0, err
+					}
+					cursor = c
+				} else {
+					c, err := skipValue(buf, cursor, depth)
+					if err != nil {
+						return 0, err
+					}
+					cursor = c
+				}
+				idx++
+				cursor = skipWhiteSpace(buf, cursor)
+				switch buf[cursor] {
+				case ']':
+					for idx < d.alen {
+						*(*unsafe.Pointer)(unsafe.Pointer(uintptr(p) + uintptr(idx)*d.size)) = d.zeroValue
+						idx++
+					}
+					cursor++
+					return cursor, nil
+				case ',':
+					cursor++
+					continue
+				default:
+					return 0, errors.ErrInvalidCharacter(buf[cursor], "array", cursor)
+				}
+			}
+		default:
+			return 0, errors.ErrUnexpectedEndOfJSON("array", cursor)
+		}
+	}
+}
+
+func (d *arrayDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: array decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/assign.go b/vendor/github.com/goccy/go-json/internal/decoder/assign.go
new file mode 100644
index 00000000..c53e6ad9
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/assign.go
@@ -0,0 +1,438 @@
+package decoder
+
+import (
+	"fmt"
+	"reflect"
+	"strconv"
+)
+
+var (
+	nilValue = reflect.ValueOf(nil)
+)
+
+func AssignValue(src, dst reflect.Value) error {
+	if dst.Type().Kind() != reflect.Ptr {
+		return fmt.Errorf("invalid dst type. required pointer type: %T", dst.Type())
+	}
+	casted, err := castValue(dst.Elem().Type(), src)
+	if err != nil {
+		return err
+	}
+	dst.Elem().Set(casted)
+	return nil
+}
+
+func castValue(t reflect.Type, v reflect.Value) (reflect.Value, error) {
+	switch t.Kind() {
+	case reflect.Int:
+		vv, err := castInt(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(int(vv.Int())), nil
+	case reflect.Int8:
+		vv, err := castInt(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(int8(vv.Int())), nil
+	case reflect.Int16:
+		vv, err := castInt(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(int16(vv.Int())), nil
+	case reflect.Int32:
+		vv, err := castInt(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(int32(vv.Int())), nil
+	case reflect.Int64:
+		return castInt(v)
+	case reflect.Uint:
+		vv, err := castUint(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(uint(vv.Uint())), nil
+	case reflect.Uint8:
+		vv, err := castUint(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(uint8(vv.Uint())), nil
+	case reflect.Uint16:
+		vv, err := castUint(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(uint16(vv.Uint())), nil
+	case reflect.Uint32:
+		vv, err := castUint(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(uint32(vv.Uint())), nil
+	case reflect.Uint64:
+		return castUint(v)
+	case reflect.Uintptr:
+		vv, err := castUint(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(uintptr(vv.Uint())), nil
+	case reflect.String:
+		return castString(v)
+	case reflect.Bool:
+		return castBool(v)
+	case reflect.Float32:
+		vv, err := castFloat(v)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(float32(vv.Float())), nil
+	case reflect.Float64:
+		return castFloat(v)
+	case reflect.Array:
+		return castArray(t, v)
+	case reflect.Slice:
+		return castSlice(t, v)
+	case reflect.Map:
+		return castMap(t, v)
+	case reflect.Struct:
+		return castStruct(t, v)
+	}
+	return v, nil
+}
+
+func castInt(v reflect.Value) (reflect.Value, error) {
+	switch v.Type().Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return v, nil
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return reflect.ValueOf(int64(v.Uint())), nil
+	case reflect.String:
+		i64, err := strconv.ParseInt(v.String(), 10, 64)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(i64), nil
+	case reflect.Bool:
+		if v.Bool() {
+			return reflect.ValueOf(int64(1)), nil
+		}
+		return reflect.ValueOf(int64(0)), nil
+	case reflect.Float32, reflect.Float64:
+		return reflect.ValueOf(int64(v.Float())), nil
+	case reflect.Array:
+		if v.Len() > 0 {
+			return castInt(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to int64 from empty array")
+	case reflect.Slice:
+		if v.Len() > 0 {
+			return castInt(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to int64 from empty slice")
+	case reflect.Interface:
+		return castInt(reflect.ValueOf(v.Interface()))
+	case reflect.Map:
+		return nilValue, fmt.Errorf("failed to cast to int64 from map")
+	case reflect.Struct:
+		return nilValue, fmt.Errorf("failed to cast to int64 from struct")
+	case reflect.Ptr:
+		return castInt(v.Elem())
+	}
+	return nilValue, fmt.Errorf("failed to cast to int64 from %s", v.Type().Kind())
+}
+
+func castUint(v reflect.Value) (reflect.Value, error) {
+	switch v.Type().Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return reflect.ValueOf(uint64(v.Int())), nil
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return v, nil
+	case reflect.String:
+		u64, err := strconv.ParseUint(v.String(), 10, 64)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(u64), nil
+	case reflect.Bool:
+		if v.Bool() {
+			return reflect.ValueOf(uint64(1)), nil
+		}
+		return reflect.ValueOf(uint64(0)), nil
+	case reflect.Float32, reflect.Float64:
+		return reflect.ValueOf(uint64(v.Float())), nil
+	case reflect.Array:
+		if v.Len() > 0 {
+			return castUint(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to uint64 from empty array")
+	case reflect.Slice:
+		if v.Len() > 0 {
+			return castUint(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to uint64 from empty slice")
+	case reflect.Interface:
+		return castUint(reflect.ValueOf(v.Interface()))
+	case reflect.Map:
+		return nilValue, fmt.Errorf("failed to cast to uint64 from map")
+	case reflect.Struct:
+		return nilValue, fmt.Errorf("failed to cast to uint64 from struct")
+	case reflect.Ptr:
+		return castUint(v.Elem())
+	}
+	return nilValue, fmt.Errorf("failed to cast to uint64 from %s", v.Type().Kind())
+}
+
+func castString(v reflect.Value) (reflect.Value, error) {
+	switch v.Type().Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return reflect.ValueOf(fmt.Sprint(v.Int())), nil
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return reflect.ValueOf(fmt.Sprint(v.Uint())), nil
+	case reflect.String:
+		return v, nil
+	case reflect.Bool:
+		if v.Bool() {
+			return reflect.ValueOf("true"), nil
+		}
+		return reflect.ValueOf("false"), nil
+	case reflect.Float32, reflect.Float64:
+		return reflect.ValueOf(fmt.Sprint(v.Float())), nil
+	case reflect.Array:
+		if v.Len() > 0 {
+			return castString(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to string from empty array")
+	case reflect.Slice:
+		if v.Len() > 0 {
+			return castString(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to string from empty slice")
+	case reflect.Interface:
+		return castString(reflect.ValueOf(v.Interface()))
+	case reflect.Map:
+		return nilValue, fmt.Errorf("failed to cast to string from map")
+	case reflect.Struct:
+		return nilValue, fmt.Errorf("failed to cast to string from struct")
+	case reflect.Ptr:
+		return castString(v.Elem())
+	}
+	return nilValue, fmt.Errorf("failed to cast to string from %s", v.Type().Kind())
+}
+
+func castBool(v reflect.Value) (reflect.Value, error) {
+	switch v.Type().Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		switch v.Int() {
+		case 0:
+			return reflect.ValueOf(false), nil
+		case 1:
+			return reflect.ValueOf(true), nil
+		}
+		return nilValue, fmt.Errorf("failed to cast to bool from %d", v.Int())
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		switch v.Uint() {
+		case 0:
+			return reflect.ValueOf(false), nil
+		case 1:
+			return reflect.ValueOf(true), nil
+		}
+		return nilValue, fmt.Errorf("failed to cast to bool from %d", v.Uint())
+	case reflect.String:
+		b, err := strconv.ParseBool(v.String())
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(b), nil
+	case reflect.Bool:
+		return v, nil
+	case reflect.Float32, reflect.Float64:
+		switch v.Float() {
+		case 0:
+			return reflect.ValueOf(false), nil
+		case 1:
+			return reflect.ValueOf(true), nil
+		}
+		return nilValue, fmt.Errorf("failed to cast to bool from %f", v.Float())
+	case reflect.Array:
+		if v.Len() > 0 {
+			return castBool(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to string from empty array")
+	case reflect.Slice:
+		if v.Len() > 0 {
+			return castBool(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to string from empty slice")
+	case reflect.Interface:
+		return castBool(reflect.ValueOf(v.Interface()))
+	case reflect.Map:
+		return nilValue, fmt.Errorf("failed to cast to string from map")
+	case reflect.Struct:
+		return nilValue, fmt.Errorf("failed to cast to string from struct")
+	case reflect.Ptr:
+		return castBool(v.Elem())
+	}
+	return nilValue, fmt.Errorf("failed to cast to bool from %s", v.Type().Kind())
+}
+
+func castFloat(v reflect.Value) (reflect.Value, error) {
+	switch v.Type().Kind() {
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return reflect.ValueOf(float64(v.Int())), nil
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return reflect.ValueOf(float64(v.Uint())), nil
+	case reflect.String:
+		f64, err := strconv.ParseFloat(v.String(), 64)
+		if err != nil {
+			return nilValue, err
+		}
+		return reflect.ValueOf(f64), nil
+	case reflect.Bool:
+		if v.Bool() {
+			return reflect.ValueOf(float64(1)), nil
+		}
+		return reflect.ValueOf(float64(0)), nil
+	case reflect.Float32, reflect.Float64:
+		return v, nil
+	case reflect.Array:
+		if v.Len() > 0 {
+			return castFloat(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to float64 from empty array")
+	case reflect.Slice:
+		if v.Len() > 0 {
+			return castFloat(v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to float64 from empty slice")
+	case reflect.Interface:
+		return castFloat(reflect.ValueOf(v.Interface()))
+	case reflect.Map:
+		return nilValue, fmt.Errorf("failed to cast to float64 from map")
+	case reflect.Struct:
+		return nilValue, fmt.Errorf("failed to cast to float64 from struct")
+	case reflect.Ptr:
+		return castFloat(v.Elem())
+	}
+	return nilValue, fmt.Errorf("failed to cast to float64 from %s", v.Type().Kind())
+}
+
+func castArray(t reflect.Type, v reflect.Value) (reflect.Value, error) {
+	kind := v.Type().Kind()
+	if kind == reflect.Interface {
+		return castArray(t, reflect.ValueOf(v.Interface()))
+	}
+	if kind != reflect.Slice && kind != reflect.Array {
+		return nilValue, fmt.Errorf("failed to cast to array from %s", kind)
+	}
+	if t.Elem() == v.Type().Elem() {
+		return v, nil
+	}
+	if t.Len() != v.Len() {
+		return nilValue, fmt.Errorf("failed to cast [%d]array from slice of %d length", t.Len(), v.Len())
+	}
+	ret := reflect.New(t).Elem()
+	for i := 0; i < v.Len(); i++ {
+		vv, err := castValue(t.Elem(), v.Index(i))
+		if err != nil {
+			return nilValue, err
+		}
+		ret.Index(i).Set(vv)
+	}
+	return ret, nil
+}
+
+func castSlice(t reflect.Type, v reflect.Value) (reflect.Value, error) {
+	kind := v.Type().Kind()
+	if kind == reflect.Interface {
+		return castSlice(t, reflect.ValueOf(v.Interface()))
+	}
+	if kind != reflect.Slice && kind != reflect.Array {
+		return nilValue, fmt.Errorf("failed to cast to slice from %s", kind)
+	}
+	if t.Elem() == v.Type().Elem() {
+		return v, nil
+	}
+	ret := reflect.MakeSlice(t, v.Len(), v.Len())
+	for i := 0; i < v.Len(); i++ {
+		vv, err := castValue(t.Elem(), v.Index(i))
+		if err != nil {
+			return nilValue, err
+		}
+		ret.Index(i).Set(vv)
+	}
+	return ret, nil
+}
+
+func castMap(t reflect.Type, v reflect.Value) (reflect.Value, error) {
+	ret := reflect.MakeMap(t)
+	switch v.Type().Kind() {
+	case reflect.Map:
+		iter := v.MapRange()
+		for iter.Next() {
+			key, err := castValue(t.Key(), iter.Key())
+			if err != nil {
+				return nilValue, err
+			}
+			value, err := castValue(t.Elem(), iter.Value())
+			if err != nil {
+				return nilValue, err
+			}
+			ret.SetMapIndex(key, value)
+		}
+		return ret, nil
+	case reflect.Interface:
+		return castMap(t, reflect.ValueOf(v.Interface()))
+	case reflect.Slice:
+		if v.Len() > 0 {
+			return castMap(t, v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to map from empty slice")
+	}
+	return nilValue, fmt.Errorf("failed to cast to map from %s", v.Type().Kind())
+}
+
+func castStruct(t reflect.Type, v reflect.Value) (reflect.Value, error) {
+	ret := reflect.New(t).Elem()
+	switch v.Type().Kind() {
+	case reflect.Map:
+		iter := v.MapRange()
+		for iter.Next() {
+			key := iter.Key()
+			k, err := castString(key)
+			if err != nil {
+				return nilValue, err
+			}
+			fieldName := k.String()
+			field, ok := t.FieldByName(fieldName)
+			if ok {
+				value, err := castValue(field.Type, iter.Value())
+				if err != nil {
+					return nilValue, err
+				}
+				ret.FieldByName(fieldName).Set(value)
+			}
+		}
+		return ret, nil
+	case reflect.Struct:
+		for i := 0; i < v.Type().NumField(); i++ {
+			name := v.Type().Field(i).Name
+			ret.FieldByName(name).Set(v.FieldByName(name))
+		}
+		return ret, nil
+	case reflect.Interface:
+		return castStruct(t, reflect.ValueOf(v.Interface()))
+	case reflect.Slice:
+		if v.Len() > 0 {
+			return castStruct(t, v.Index(0))
+		}
+		return nilValue, fmt.Errorf("failed to cast to struct from empty slice")
+	default:
+		return nilValue, fmt.Errorf("failed to cast to struct from %s", v.Type().Kind())
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/bool.go b/vendor/github.com/goccy/go-json/internal/decoder/bool.go
new file mode 100644
index 00000000..ba6cf5bc
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/bool.go
@@ -0,0 +1,83 @@
+package decoder
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+type boolDecoder struct {
+	structName string
+	fieldName  string
+}
+
+func newBoolDecoder(structName, fieldName string) *boolDecoder {
+	return &boolDecoder{structName: structName, fieldName: fieldName}
+}
+
+func (d *boolDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	c := s.skipWhiteSpace()
+	for {
+		switch c {
+		case 't':
+			if err := trueBytes(s); err != nil {
+				return err
+			}
+			**(**bool)(unsafe.Pointer(&p)) = true
+			return nil
+		case 'f':
+			if err := falseBytes(s); err != nil {
+				return err
+			}
+			**(**bool)(unsafe.Pointer(&p)) = false
+			return nil
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return err
+			}
+			return nil
+		case nul:
+			if s.read() {
+				c = s.char()
+				continue
+			}
+			goto ERROR
+		}
+		break
+	}
+ERROR:
+	return errors.ErrUnexpectedEndOfJSON("bool", s.totalOffset())
+}
+
+func (d *boolDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	switch buf[cursor] {
+	case 't':
+		if err := validateTrue(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 4
+		**(**bool)(unsafe.Pointer(&p)) = true
+		return cursor, nil
+	case 'f':
+		if err := validateFalse(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 5
+		**(**bool)(unsafe.Pointer(&p)) = false
+		return cursor, nil
+	case 'n':
+		if err := validateNull(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 4
+		return cursor, nil
+	}
+	return 0, errors.ErrUnexpectedEndOfJSON("bool", cursor)
+}
+
+func (d *boolDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: bool decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/bytes.go b/vendor/github.com/goccy/go-json/internal/decoder/bytes.go
new file mode 100644
index 00000000..939bf432
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/bytes.go
@@ -0,0 +1,118 @@
+package decoder
+
+import (
+	"encoding/base64"
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type bytesDecoder struct {
+	typ           *runtime.Type
+	sliceDecoder  Decoder
+	stringDecoder *stringDecoder
+	structName    string
+	fieldName     string
+}
+
+func byteUnmarshalerSliceDecoder(typ *runtime.Type, structName string, fieldName string) Decoder {
+	var unmarshalDecoder Decoder
+	switch {
+	case runtime.PtrTo(typ).Implements(unmarshalJSONType):
+		unmarshalDecoder = newUnmarshalJSONDecoder(runtime.PtrTo(typ), structName, fieldName)
+	case runtime.PtrTo(typ).Implements(unmarshalTextType):
+		unmarshalDecoder = newUnmarshalTextDecoder(runtime.PtrTo(typ), structName, fieldName)
+	default:
+		unmarshalDecoder, _ = compileUint8(typ, structName, fieldName)
+	}
+	return newSliceDecoder(unmarshalDecoder, typ, 1, structName, fieldName)
+}
+
+func newBytesDecoder(typ *runtime.Type, structName string, fieldName string) *bytesDecoder {
+	return &bytesDecoder{
+		typ:           typ,
+		sliceDecoder:  byteUnmarshalerSliceDecoder(typ, structName, fieldName),
+		stringDecoder: newStringDecoder(structName, fieldName),
+		structName:    structName,
+		fieldName:     fieldName,
+	}
+}
+
+func (d *bytesDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	bytes, err := d.decodeStreamBinary(s, depth, p)
+	if err != nil {
+		return err
+	}
+	if bytes == nil {
+		s.reset()
+		return nil
+	}
+	decodedLen := base64.StdEncoding.DecodedLen(len(bytes))
+	buf := make([]byte, decodedLen)
+	n, err := base64.StdEncoding.Decode(buf, bytes)
+	if err != nil {
+		return err
+	}
+	*(*[]byte)(p) = buf[:n]
+	s.reset()
+	return nil
+}
+
+func (d *bytesDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	bytes, c, err := d.decodeBinary(ctx, cursor, depth, p)
+	if err != nil {
+		return 0, err
+	}
+	if bytes == nil {
+		return c, nil
+	}
+	cursor = c
+	decodedLen := base64.StdEncoding.DecodedLen(len(bytes))
+	b := make([]byte, decodedLen)
+	n, err := base64.StdEncoding.Decode(b, bytes)
+	if err != nil {
+		return 0, err
+	}
+	*(*[]byte)(p) = b[:n]
+	return cursor, nil
+}
+
+func (d *bytesDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: []byte decoder does not support decode path")
+}
+
+func (d *bytesDecoder) decodeStreamBinary(s *Stream, depth int64, p unsafe.Pointer) ([]byte, error) {
+	c := s.skipWhiteSpace()
+	if c == '[' {
+		if d.sliceDecoder == nil {
+			return nil, &errors.UnmarshalTypeError{
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		}
+		err := d.sliceDecoder.DecodeStream(s, depth, p)
+		return nil, err
+	}
+	return d.stringDecoder.decodeStreamByte(s)
+}
+
+func (d *bytesDecoder) decodeBinary(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) ([]byte, int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	if buf[cursor] == '[' {
+		if d.sliceDecoder == nil {
+			return nil, 0, &errors.UnmarshalTypeError{
+				Type:   runtime.RType2Type(d.typ),
+				Offset: cursor,
+			}
+		}
+		c, err := d.sliceDecoder.Decode(ctx, cursor, depth, p)
+		if err != nil {
+			return nil, 0, err
+		}
+		return nil, c, nil
+	}
+	return d.stringDecoder.decodeByte(buf, cursor)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/compile.go b/vendor/github.com/goccy/go-json/internal/decoder/compile.go
new file mode 100644
index 00000000..fab64376
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/compile.go
@@ -0,0 +1,487 @@
+package decoder
+
+import (
+	"encoding/json"
+	"fmt"
+	"reflect"
+	"strings"
+	"sync/atomic"
+	"unicode"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+var (
+	jsonNumberType   = reflect.TypeOf(json.Number(""))
+	typeAddr         *runtime.TypeAddr
+	cachedDecoderMap unsafe.Pointer // map[uintptr]decoder
+	cachedDecoder    []Decoder
+)
+
+func init() {
+	typeAddr = runtime.AnalyzeTypeAddr()
+	if typeAddr == nil {
+		typeAddr = &runtime.TypeAddr{}
+	}
+	cachedDecoder = make([]Decoder, typeAddr.AddrRange>>typeAddr.AddrShift+1)
+}
+
+func loadDecoderMap() map[uintptr]Decoder {
+	p := atomic.LoadPointer(&cachedDecoderMap)
+	return *(*map[uintptr]Decoder)(unsafe.Pointer(&p))
+}
+
+func storeDecoder(typ uintptr, dec Decoder, m map[uintptr]Decoder) {
+	newDecoderMap := make(map[uintptr]Decoder, len(m)+1)
+	newDecoderMap[typ] = dec
+
+	for k, v := range m {
+		newDecoderMap[k] = v
+	}
+
+	atomic.StorePointer(&cachedDecoderMap, *(*unsafe.Pointer)(unsafe.Pointer(&newDecoderMap)))
+}
+
+func compileToGetDecoderSlowPath(typeptr uintptr, typ *runtime.Type) (Decoder, error) {
+	decoderMap := loadDecoderMap()
+	if dec, exists := decoderMap[typeptr]; exists {
+		return dec, nil
+	}
+
+	dec, err := compileHead(typ, map[uintptr]Decoder{})
+	if err != nil {
+		return nil, err
+	}
+	storeDecoder(typeptr, dec, decoderMap)
+	return dec, nil
+}
+
+func compileHead(typ *runtime.Type, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	switch {
+	case implementsUnmarshalJSONType(runtime.PtrTo(typ)):
+		return newUnmarshalJSONDecoder(runtime.PtrTo(typ), "", ""), nil
+	case runtime.PtrTo(typ).Implements(unmarshalTextType):
+		return newUnmarshalTextDecoder(runtime.PtrTo(typ), "", ""), nil
+	}
+	return compile(typ.Elem(), "", "", structTypeToDecoder)
+}
+
+func compile(typ *runtime.Type, structName, fieldName string, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	switch {
+	case implementsUnmarshalJSONType(runtime.PtrTo(typ)):
+		return newUnmarshalJSONDecoder(runtime.PtrTo(typ), structName, fieldName), nil
+	case runtime.PtrTo(typ).Implements(unmarshalTextType):
+		return newUnmarshalTextDecoder(runtime.PtrTo(typ), structName, fieldName), nil
+	}
+
+	switch typ.Kind() {
+	case reflect.Ptr:
+		return compilePtr(typ, structName, fieldName, structTypeToDecoder)
+	case reflect.Struct:
+		return compileStruct(typ, structName, fieldName, structTypeToDecoder)
+	case reflect.Slice:
+		elem := typ.Elem()
+		if elem.Kind() == reflect.Uint8 {
+			return compileBytes(elem, structName, fieldName)
+		}
+		return compileSlice(typ, structName, fieldName, structTypeToDecoder)
+	case reflect.Array:
+		return compileArray(typ, structName, fieldName, structTypeToDecoder)
+	case reflect.Map:
+		return compileMap(typ, structName, fieldName, structTypeToDecoder)
+	case reflect.Interface:
+		return compileInterface(typ, structName, fieldName)
+	case reflect.Uintptr:
+		return compileUint(typ, structName, fieldName)
+	case reflect.Int:
+		return compileInt(typ, structName, fieldName)
+	case reflect.Int8:
+		return compileInt8(typ, structName, fieldName)
+	case reflect.Int16:
+		return compileInt16(typ, structName, fieldName)
+	case reflect.Int32:
+		return compileInt32(typ, structName, fieldName)
+	case reflect.Int64:
+		return compileInt64(typ, structName, fieldName)
+	case reflect.Uint:
+		return compileUint(typ, structName, fieldName)
+	case reflect.Uint8:
+		return compileUint8(typ, structName, fieldName)
+	case reflect.Uint16:
+		return compileUint16(typ, structName, fieldName)
+	case reflect.Uint32:
+		return compileUint32(typ, structName, fieldName)
+	case reflect.Uint64:
+		return compileUint64(typ, structName, fieldName)
+	case reflect.String:
+		return compileString(typ, structName, fieldName)
+	case reflect.Bool:
+		return compileBool(structName, fieldName)
+	case reflect.Float32:
+		return compileFloat32(structName, fieldName)
+	case reflect.Float64:
+		return compileFloat64(structName, fieldName)
+	case reflect.Func:
+		return compileFunc(typ, structName, fieldName)
+	}
+	return newInvalidDecoder(typ, structName, fieldName), nil
+}
+
+func isStringTagSupportedType(typ *runtime.Type) bool {
+	switch {
+	case implementsUnmarshalJSONType(runtime.PtrTo(typ)):
+		return false
+	case runtime.PtrTo(typ).Implements(unmarshalTextType):
+		return false
+	}
+	switch typ.Kind() {
+	case reflect.Map:
+		return false
+	case reflect.Slice:
+		return false
+	case reflect.Array:
+		return false
+	case reflect.Struct:
+		return false
+	case reflect.Interface:
+		return false
+	}
+	return true
+}
+
+func compileMapKey(typ *runtime.Type, structName, fieldName string, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	if runtime.PtrTo(typ).Implements(unmarshalTextType) {
+		return newUnmarshalTextDecoder(runtime.PtrTo(typ), structName, fieldName), nil
+	}
+	if typ.Kind() == reflect.String {
+		return newStringDecoder(structName, fieldName), nil
+	}
+	dec, err := compile(typ, structName, fieldName, structTypeToDecoder)
+	if err != nil {
+		return nil, err
+	}
+	for {
+		switch t := dec.(type) {
+		case *stringDecoder, *interfaceDecoder:
+			return dec, nil
+		case *boolDecoder, *intDecoder, *uintDecoder, *numberDecoder:
+			return newWrappedStringDecoder(typ, dec, structName, fieldName), nil
+		case *ptrDecoder:
+			dec = t.dec
+		default:
+			return newInvalidDecoder(typ, structName, fieldName), nil
+		}
+	}
+}
+
+func compilePtr(typ *runtime.Type, structName, fieldName string, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	dec, err := compile(typ.Elem(), structName, fieldName, structTypeToDecoder)
+	if err != nil {
+		return nil, err
+	}
+	return newPtrDecoder(dec, typ.Elem(), structName, fieldName), nil
+}
+
+func compileInt(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newIntDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v int64) {
+		*(*int)(p) = int(v)
+	}), nil
+}
+
+func compileInt8(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newIntDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v int64) {
+		*(*int8)(p) = int8(v)
+	}), nil
+}
+
+func compileInt16(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newIntDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v int64) {
+		*(*int16)(p) = int16(v)
+	}), nil
+}
+
+func compileInt32(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newIntDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v int64) {
+		*(*int32)(p) = int32(v)
+	}), nil
+}
+
+func compileInt64(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newIntDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v int64) {
+		*(*int64)(p) = v
+	}), nil
+}
+
+func compileUint(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newUintDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v uint64) {
+		*(*uint)(p) = uint(v)
+	}), nil
+}
+
+func compileUint8(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newUintDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v uint64) {
+		*(*uint8)(p) = uint8(v)
+	}), nil
+}
+
+func compileUint16(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newUintDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v uint64) {
+		*(*uint16)(p) = uint16(v)
+	}), nil
+}
+
+func compileUint32(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newUintDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v uint64) {
+		*(*uint32)(p) = uint32(v)
+	}), nil
+}
+
+func compileUint64(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newUintDecoder(typ, structName, fieldName, func(p unsafe.Pointer, v uint64) {
+		*(*uint64)(p) = v
+	}), nil
+}
+
+func compileFloat32(structName, fieldName string) (Decoder, error) {
+	return newFloatDecoder(structName, fieldName, func(p unsafe.Pointer, v float64) {
+		*(*float32)(p) = float32(v)
+	}), nil
+}
+
+func compileFloat64(structName, fieldName string) (Decoder, error) {
+	return newFloatDecoder(structName, fieldName, func(p unsafe.Pointer, v float64) {
+		*(*float64)(p) = v
+	}), nil
+}
+
+func compileString(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	if typ == runtime.Type2RType(jsonNumberType) {
+		return newNumberDecoder(structName, fieldName, func(p unsafe.Pointer, v json.Number) {
+			*(*json.Number)(p) = v
+		}), nil
+	}
+	return newStringDecoder(structName, fieldName), nil
+}
+
+func compileBool(structName, fieldName string) (Decoder, error) {
+	return newBoolDecoder(structName, fieldName), nil
+}
+
+func compileBytes(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newBytesDecoder(typ, structName, fieldName), nil
+}
+
+func compileSlice(typ *runtime.Type, structName, fieldName string, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	elem := typ.Elem()
+	decoder, err := compile(elem, structName, fieldName, structTypeToDecoder)
+	if err != nil {
+		return nil, err
+	}
+	return newSliceDecoder(decoder, elem, elem.Size(), structName, fieldName), nil
+}
+
+func compileArray(typ *runtime.Type, structName, fieldName string, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	elem := typ.Elem()
+	decoder, err := compile(elem, structName, fieldName, structTypeToDecoder)
+	if err != nil {
+		return nil, err
+	}
+	return newArrayDecoder(decoder, elem, typ.Len(), structName, fieldName), nil
+}
+
+func compileMap(typ *runtime.Type, structName, fieldName string, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	keyDec, err := compileMapKey(typ.Key(), structName, fieldName, structTypeToDecoder)
+	if err != nil {
+		return nil, err
+	}
+	valueDec, err := compile(typ.Elem(), structName, fieldName, structTypeToDecoder)
+	if err != nil {
+		return nil, err
+	}
+	return newMapDecoder(typ, typ.Key(), keyDec, typ.Elem(), valueDec, structName, fieldName), nil
+}
+
+func compileInterface(typ *runtime.Type, structName, fieldName string) (Decoder, error) {
+	return newInterfaceDecoder(typ, structName, fieldName), nil
+}
+
+func compileFunc(typ *runtime.Type, strutName, fieldName string) (Decoder, error) {
+	return newFuncDecoder(typ, strutName, fieldName), nil
+}
+
+func typeToStructTags(typ *runtime.Type) runtime.StructTags {
+	tags := runtime.StructTags{}
+	fieldNum := typ.NumField()
+	for i := 0; i < fieldNum; i++ {
+		field := typ.Field(i)
+		if runtime.IsIgnoredStructField(field) {
+			continue
+		}
+		tags = append(tags, runtime.StructTagFromField(field))
+	}
+	return tags
+}
+
+func compileStruct(typ *runtime.Type, structName, fieldName string, structTypeToDecoder map[uintptr]Decoder) (Decoder, error) {
+	fieldNum := typ.NumField()
+	fieldMap := map[string]*structFieldSet{}
+	typeptr := uintptr(unsafe.Pointer(typ))
+	if dec, exists := structTypeToDecoder[typeptr]; exists {
+		return dec, nil
+	}
+	structDec := newStructDecoder(structName, fieldName, fieldMap)
+	structTypeToDecoder[typeptr] = structDec
+	structName = typ.Name()
+	tags := typeToStructTags(typ)
+	allFields := []*structFieldSet{}
+	for i := 0; i < fieldNum; i++ {
+		field := typ.Field(i)
+		if runtime.IsIgnoredStructField(field) {
+			continue
+		}
+		isUnexportedField := unicode.IsLower([]rune(field.Name)[0])
+		tag := runtime.StructTagFromField(field)
+		dec, err := compile(runtime.Type2RType(field.Type), structName, field.Name, structTypeToDecoder)
+		if err != nil {
+			return nil, err
+		}
+		if field.Anonymous && !tag.IsTaggedKey {
+			if stDec, ok := dec.(*structDecoder); ok {
+				if runtime.Type2RType(field.Type) == typ {
+					// recursive definition
+					continue
+				}
+				for k, v := range stDec.fieldMap {
+					if tags.ExistsKey(k) {
+						continue
+					}
+					fieldSet := &structFieldSet{
+						dec:         v.dec,
+						offset:      field.Offset + v.offset,
+						isTaggedKey: v.isTaggedKey,
+						key:         k,
+						keyLen:      int64(len(k)),
+					}
+					allFields = append(allFields, fieldSet)
+				}
+			} else if pdec, ok := dec.(*ptrDecoder); ok {
+				contentDec := pdec.contentDecoder()
+				if pdec.typ == typ {
+					// recursive definition
+					continue
+				}
+				var fieldSetErr error
+				if isUnexportedField {
+					fieldSetErr = fmt.Errorf(
+						"json: cannot set embedded pointer to unexported struct: %v",
+						field.Type.Elem(),
+					)
+				}
+				if dec, ok := contentDec.(*structDecoder); ok {
+					for k, v := range dec.fieldMap {
+						if tags.ExistsKey(k) {
+							continue
+						}
+						fieldSet := &structFieldSet{
+							dec:         newAnonymousFieldDecoder(pdec.typ, v.offset, v.dec),
+							offset:      field.Offset,
+							isTaggedKey: v.isTaggedKey,
+							key:         k,
+							keyLen:      int64(len(k)),
+							err:         fieldSetErr,
+						}
+						allFields = append(allFields, fieldSet)
+					}
+				} else {
+					fieldSet := &structFieldSet{
+						dec:         pdec,
+						offset:      field.Offset,
+						isTaggedKey: tag.IsTaggedKey,
+						key:         field.Name,
+						keyLen:      int64(len(field.Name)),
+					}
+					allFields = append(allFields, fieldSet)
+				}
+			} else {
+				fieldSet := &structFieldSet{
+					dec:         dec,
+					offset:      field.Offset,
+					isTaggedKey: tag.IsTaggedKey,
+					key:         field.Name,
+					keyLen:      int64(len(field.Name)),
+				}
+				allFields = append(allFields, fieldSet)
+			}
+		} else {
+			if tag.IsString && isStringTagSupportedType(runtime.Type2RType(field.Type)) {
+				dec = newWrappedStringDecoder(runtime.Type2RType(field.Type), dec, structName, field.Name)
+			}
+			var key string
+			if tag.Key != "" {
+				key = tag.Key
+			} else {
+				key = field.Name
+			}
+			fieldSet := &structFieldSet{
+				dec:         dec,
+				offset:      field.Offset,
+				isTaggedKey: tag.IsTaggedKey,
+				key:         key,
+				keyLen:      int64(len(key)),
+			}
+			allFields = append(allFields, fieldSet)
+		}
+	}
+	for _, set := range filterDuplicatedFields(allFields) {
+		fieldMap[set.key] = set
+		lower := strings.ToLower(set.key)
+		if _, exists := fieldMap[lower]; !exists {
+			// first win
+			fieldMap[lower] = set
+		}
+	}
+	delete(structTypeToDecoder, typeptr)
+	structDec.tryOptimize()
+	return structDec, nil
+}
+
+func filterDuplicatedFields(allFields []*structFieldSet) []*structFieldSet {
+	fieldMap := map[string][]*structFieldSet{}
+	for _, field := range allFields {
+		fieldMap[field.key] = append(fieldMap[field.key], field)
+	}
+	duplicatedFieldMap := map[string]struct{}{}
+	for k, sets := range fieldMap {
+		sets = filterFieldSets(sets)
+		if len(sets) != 1 {
+			duplicatedFieldMap[k] = struct{}{}
+		}
+	}
+
+	filtered := make([]*structFieldSet, 0, len(allFields))
+	for _, field := range allFields {
+		if _, exists := duplicatedFieldMap[field.key]; exists {
+			continue
+		}
+		filtered = append(filtered, field)
+	}
+	return filtered
+}
+
+func filterFieldSets(sets []*structFieldSet) []*structFieldSet {
+	if len(sets) == 1 {
+		return sets
+	}
+	filtered := make([]*structFieldSet, 0, len(sets))
+	for _, set := range sets {
+		if set.isTaggedKey {
+			filtered = append(filtered, set)
+		}
+	}
+	return filtered
+}
+
+func implementsUnmarshalJSONType(typ *runtime.Type) bool {
+	return typ.Implements(unmarshalJSONType) || typ.Implements(unmarshalJSONContextType)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go b/vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go
new file mode 100644
index 00000000..eb7e2b13
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/compile_norace.go
@@ -0,0 +1,29 @@
+//go:build !race
+// +build !race
+
+package decoder
+
+import (
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+func CompileToGetDecoder(typ *runtime.Type) (Decoder, error) {
+	typeptr := uintptr(unsafe.Pointer(typ))
+	if typeptr > typeAddr.MaxTypeAddr {
+		return compileToGetDecoderSlowPath(typeptr, typ)
+	}
+
+	index := (typeptr - typeAddr.BaseTypeAddr) >> typeAddr.AddrShift
+	if dec := cachedDecoder[index]; dec != nil {
+		return dec, nil
+	}
+
+	dec, err := compileHead(typ, map[uintptr]Decoder{})
+	if err != nil {
+		return nil, err
+	}
+	cachedDecoder[index] = dec
+	return dec, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/compile_race.go b/vendor/github.com/goccy/go-json/internal/decoder/compile_race.go
new file mode 100644
index 00000000..49cdda4a
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/compile_race.go
@@ -0,0 +1,37 @@
+//go:build race
+// +build race
+
+package decoder
+
+import (
+	"sync"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+var decMu sync.RWMutex
+
+func CompileToGetDecoder(typ *runtime.Type) (Decoder, error) {
+	typeptr := uintptr(unsafe.Pointer(typ))
+	if typeptr > typeAddr.MaxTypeAddr {
+		return compileToGetDecoderSlowPath(typeptr, typ)
+	}
+
+	index := (typeptr - typeAddr.BaseTypeAddr) >> typeAddr.AddrShift
+	decMu.RLock()
+	if dec := cachedDecoder[index]; dec != nil {
+		decMu.RUnlock()
+		return dec, nil
+	}
+	decMu.RUnlock()
+
+	dec, err := compileHead(typ, map[uintptr]Decoder{})
+	if err != nil {
+		return nil, err
+	}
+	decMu.Lock()
+	cachedDecoder[index] = dec
+	decMu.Unlock()
+	return dec, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/context.go b/vendor/github.com/goccy/go-json/internal/decoder/context.go
new file mode 100644
index 00000000..cb2ffdaf
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/context.go
@@ -0,0 +1,254 @@
+package decoder
+
+import (
+	"sync"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+type RuntimeContext struct {
+	Buf    []byte
+	Option *Option
+}
+
+var (
+	runtimeContextPool = sync.Pool{
+		New: func() interface{} {
+			return &RuntimeContext{
+				Option: &Option{},
+			}
+		},
+	}
+)
+
+func TakeRuntimeContext() *RuntimeContext {
+	return runtimeContextPool.Get().(*RuntimeContext)
+}
+
+func ReleaseRuntimeContext(ctx *RuntimeContext) {
+	runtimeContextPool.Put(ctx)
+}
+
+var (
+	isWhiteSpace = [256]bool{}
+)
+
+func init() {
+	isWhiteSpace[' '] = true
+	isWhiteSpace['\n'] = true
+	isWhiteSpace['\t'] = true
+	isWhiteSpace['\r'] = true
+}
+
+func char(ptr unsafe.Pointer, offset int64) byte {
+	return *(*byte)(unsafe.Pointer(uintptr(ptr) + uintptr(offset)))
+}
+
+func skipWhiteSpace(buf []byte, cursor int64) int64 {
+	for isWhiteSpace[buf[cursor]] {
+		cursor++
+	}
+	return cursor
+}
+
+func skipObject(buf []byte, cursor, depth int64) (int64, error) {
+	braceCount := 1
+	for {
+		switch buf[cursor] {
+		case '{':
+			braceCount++
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+			}
+		case '}':
+			depth--
+			braceCount--
+			if braceCount == 0 {
+				return cursor + 1, nil
+			}
+		case '[':
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+			}
+		case ']':
+			depth--
+		case '"':
+			for {
+				cursor++
+				switch buf[cursor] {
+				case '\\':
+					cursor++
+					if buf[cursor] == nul {
+						return 0, errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+					}
+				case '"':
+					goto SWITCH_OUT
+				case nul:
+					return 0, errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+				}
+			}
+		case nul:
+			return 0, errors.ErrUnexpectedEndOfJSON("object of object", cursor)
+		}
+	SWITCH_OUT:
+		cursor++
+	}
+}
+
+func skipArray(buf []byte, cursor, depth int64) (int64, error) {
+	bracketCount := 1
+	for {
+		switch buf[cursor] {
+		case '[':
+			bracketCount++
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+			}
+		case ']':
+			bracketCount--
+			depth--
+			if bracketCount == 0 {
+				return cursor + 1, nil
+			}
+		case '{':
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+			}
+		case '}':
+			depth--
+		case '"':
+			for {
+				cursor++
+				switch buf[cursor] {
+				case '\\':
+					cursor++
+					if buf[cursor] == nul {
+						return 0, errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+					}
+				case '"':
+					goto SWITCH_OUT
+				case nul:
+					return 0, errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+				}
+			}
+		case nul:
+			return 0, errors.ErrUnexpectedEndOfJSON("array of object", cursor)
+		}
+	SWITCH_OUT:
+		cursor++
+	}
+}
+
+func skipValue(buf []byte, cursor, depth int64) (int64, error) {
+	for {
+		switch buf[cursor] {
+		case ' ', '\t', '\n', '\r':
+			cursor++
+			continue
+		case '{':
+			return skipObject(buf, cursor+1, depth+1)
+		case '[':
+			return skipArray(buf, cursor+1, depth+1)
+		case '"':
+			for {
+				cursor++
+				switch buf[cursor] {
+				case '\\':
+					cursor++
+					if buf[cursor] == nul {
+						return 0, errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+					}
+				case '"':
+					return cursor + 1, nil
+				case nul:
+					return 0, errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+				}
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			for {
+				cursor++
+				if floatTable[buf[cursor]] {
+					continue
+				}
+				break
+			}
+			return cursor, nil
+		case 't':
+			if err := validateTrue(buf, cursor); err != nil {
+				return 0, err
+			}
+			cursor += 4
+			return cursor, nil
+		case 'f':
+			if err := validateFalse(buf, cursor); err != nil {
+				return 0, err
+			}
+			cursor += 5
+			return cursor, nil
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return 0, err
+			}
+			cursor += 4
+			return cursor, nil
+		default:
+			return cursor, errors.ErrUnexpectedEndOfJSON("null", cursor)
+		}
+	}
+}
+
+func validateTrue(buf []byte, cursor int64) error {
+	if cursor+3 >= int64(len(buf)) {
+		return errors.ErrUnexpectedEndOfJSON("true", cursor)
+	}
+	if buf[cursor+1] != 'r' {
+		return errors.ErrInvalidCharacter(buf[cursor+1], "true", cursor)
+	}
+	if buf[cursor+2] != 'u' {
+		return errors.ErrInvalidCharacter(buf[cursor+2], "true", cursor)
+	}
+	if buf[cursor+3] != 'e' {
+		return errors.ErrInvalidCharacter(buf[cursor+3], "true", cursor)
+	}
+	return nil
+}
+
+func validateFalse(buf []byte, cursor int64) error {
+	if cursor+4 >= int64(len(buf)) {
+		return errors.ErrUnexpectedEndOfJSON("false", cursor)
+	}
+	if buf[cursor+1] != 'a' {
+		return errors.ErrInvalidCharacter(buf[cursor+1], "false", cursor)
+	}
+	if buf[cursor+2] != 'l' {
+		return errors.ErrInvalidCharacter(buf[cursor+2], "false", cursor)
+	}
+	if buf[cursor+3] != 's' {
+		return errors.ErrInvalidCharacter(buf[cursor+3], "false", cursor)
+	}
+	if buf[cursor+4] != 'e' {
+		return errors.ErrInvalidCharacter(buf[cursor+4], "false", cursor)
+	}
+	return nil
+}
+
+func validateNull(buf []byte, cursor int64) error {
+	if cursor+3 >= int64(len(buf)) {
+		return errors.ErrUnexpectedEndOfJSON("null", cursor)
+	}
+	if buf[cursor+1] != 'u' {
+		return errors.ErrInvalidCharacter(buf[cursor+1], "null", cursor)
+	}
+	if buf[cursor+2] != 'l' {
+		return errors.ErrInvalidCharacter(buf[cursor+2], "null", cursor)
+	}
+	if buf[cursor+3] != 'l' {
+		return errors.ErrInvalidCharacter(buf[cursor+3], "null", cursor)
+	}
+	return nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/float.go b/vendor/github.com/goccy/go-json/internal/decoder/float.go
new file mode 100644
index 00000000..9b2eb8b3
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/float.go
@@ -0,0 +1,170 @@
+package decoder
+
+import (
+	"strconv"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+type floatDecoder struct {
+	op         func(unsafe.Pointer, float64)
+	structName string
+	fieldName  string
+}
+
+func newFloatDecoder(structName, fieldName string, op func(unsafe.Pointer, float64)) *floatDecoder {
+	return &floatDecoder{op: op, structName: structName, fieldName: fieldName}
+}
+
+var (
+	floatTable = [256]bool{
+		'0': true,
+		'1': true,
+		'2': true,
+		'3': true,
+		'4': true,
+		'5': true,
+		'6': true,
+		'7': true,
+		'8': true,
+		'9': true,
+		'.': true,
+		'e': true,
+		'E': true,
+		'+': true,
+		'-': true,
+	}
+
+	validEndNumberChar = [256]bool{
+		nul:  true,
+		' ':  true,
+		'\t': true,
+		'\r': true,
+		'\n': true,
+		',':  true,
+		':':  true,
+		'}':  true,
+		']':  true,
+	}
+)
+
+func floatBytes(s *Stream) []byte {
+	start := s.cursor
+	for {
+		s.cursor++
+		if floatTable[s.char()] {
+			continue
+		} else if s.char() == nul {
+			if s.read() {
+				s.cursor-- // for retry current character
+				continue
+			}
+		}
+		break
+	}
+	return s.buf[start:s.cursor]
+}
+
+func (d *floatDecoder) decodeStreamByte(s *Stream) ([]byte, error) {
+	for {
+		switch s.char() {
+		case ' ', '\n', '\t', '\r':
+			s.cursor++
+			continue
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return floatBytes(s), nil
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return nil, err
+			}
+			return nil, nil
+		case nul:
+			if s.read() {
+				continue
+			}
+			goto ERROR
+		default:
+			goto ERROR
+		}
+	}
+ERROR:
+	return nil, errors.ErrUnexpectedEndOfJSON("float", s.totalOffset())
+}
+
+func (d *floatDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {
+	for {
+		switch buf[cursor] {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			start := cursor
+			cursor++
+			for floatTable[buf[cursor]] {
+				cursor++
+			}
+			num := buf[start:cursor]
+			return num, cursor, nil
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return nil, 0, err
+			}
+			cursor += 4
+			return nil, cursor, nil
+		default:
+			return nil, 0, errors.ErrUnexpectedEndOfJSON("float", cursor)
+		}
+	}
+}
+
+func (d *floatDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	bytes, err := d.decodeStreamByte(s)
+	if err != nil {
+		return err
+	}
+	if bytes == nil {
+		return nil
+	}
+	str := *(*string)(unsafe.Pointer(&bytes))
+	f64, err := strconv.ParseFloat(str, 64)
+	if err != nil {
+		return errors.ErrSyntax(err.Error(), s.totalOffset())
+	}
+	d.op(p, f64)
+	return nil
+}
+
+func (d *floatDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	bytes, c, err := d.decodeByte(buf, cursor)
+	if err != nil {
+		return 0, err
+	}
+	if bytes == nil {
+		return c, nil
+	}
+	cursor = c
+	if !validEndNumberChar[buf[cursor]] {
+		return 0, errors.ErrUnexpectedEndOfJSON("float", cursor)
+	}
+	s := *(*string)(unsafe.Pointer(&bytes))
+	f64, err := strconv.ParseFloat(s, 64)
+	if err != nil {
+		return 0, errors.ErrSyntax(err.Error(), cursor)
+	}
+	d.op(p, f64)
+	return cursor, nil
+}
+
+func (d *floatDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	buf := ctx.Buf
+	bytes, c, err := d.decodeByte(buf, cursor)
+	if err != nil {
+		return nil, 0, err
+	}
+	if bytes == nil {
+		return [][]byte{nullbytes}, c, nil
+	}
+	return [][]byte{bytes}, c, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/func.go b/vendor/github.com/goccy/go-json/internal/decoder/func.go
new file mode 100644
index 00000000..4cc12ca8
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/func.go
@@ -0,0 +1,146 @@
+package decoder
+
+import (
+	"bytes"
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type funcDecoder struct {
+	typ        *runtime.Type
+	structName string
+	fieldName  string
+}
+
+func newFuncDecoder(typ *runtime.Type, structName, fieldName string) *funcDecoder {
+	fnDecoder := &funcDecoder{typ, structName, fieldName}
+	return fnDecoder
+}
+
+func (d *funcDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	s.skipWhiteSpace()
+	start := s.cursor
+	if err := s.skipValue(depth); err != nil {
+		return err
+	}
+	src := s.buf[start:s.cursor]
+	if len(src) > 0 {
+		switch src[0] {
+		case '"':
+			return &errors.UnmarshalTypeError{
+				Value:  "string",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		case '[':
+			return &errors.UnmarshalTypeError{
+				Value:  "array",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		case '{':
+			return &errors.UnmarshalTypeError{
+				Value:  "object",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return &errors.UnmarshalTypeError{
+				Value:  "number",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return err
+			}
+			*(*unsafe.Pointer)(p) = nil
+			return nil
+		case 't':
+			if err := trueBytes(s); err == nil {
+				return &errors.UnmarshalTypeError{
+					Value:  "boolean",
+					Type:   runtime.RType2Type(d.typ),
+					Offset: s.totalOffset(),
+				}
+			}
+		case 'f':
+			if err := falseBytes(s); err == nil {
+				return &errors.UnmarshalTypeError{
+					Value:  "boolean",
+					Type:   runtime.RType2Type(d.typ),
+					Offset: s.totalOffset(),
+				}
+			}
+		}
+	}
+	return errors.ErrInvalidBeginningOfValue(s.buf[s.cursor], s.totalOffset())
+}
+
+func (d *funcDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	start := cursor
+	end, err := skipValue(buf, cursor, depth)
+	if err != nil {
+		return 0, err
+	}
+	src := buf[start:end]
+	if len(src) > 0 {
+		switch src[0] {
+		case '"':
+			return 0, &errors.UnmarshalTypeError{
+				Value:  "string",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: start,
+			}
+		case '[':
+			return 0, &errors.UnmarshalTypeError{
+				Value:  "array",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: start,
+			}
+		case '{':
+			return 0, &errors.UnmarshalTypeError{
+				Value:  "object",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: start,
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return 0, &errors.UnmarshalTypeError{
+				Value:  "number",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: start,
+			}
+		case 'n':
+			if bytes.Equal(src, nullbytes) {
+				*(*unsafe.Pointer)(p) = nil
+				return end, nil
+			}
+		case 't':
+			if err := validateTrue(buf, start); err == nil {
+				return 0, &errors.UnmarshalTypeError{
+					Value:  "boolean",
+					Type:   runtime.RType2Type(d.typ),
+					Offset: start,
+				}
+			}
+		case 'f':
+			if err := validateFalse(buf, start); err == nil {
+				return 0, &errors.UnmarshalTypeError{
+					Value:  "boolean",
+					Type:   runtime.RType2Type(d.typ),
+					Offset: start,
+				}
+			}
+		}
+	}
+	return cursor, errors.ErrInvalidBeginningOfValue(buf[cursor], cursor)
+}
+
+func (d *funcDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: func decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/int.go b/vendor/github.com/goccy/go-json/internal/decoder/int.go
new file mode 100644
index 00000000..1a7f0819
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/int.go
@@ -0,0 +1,246 @@
+package decoder
+
+import (
+	"fmt"
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type intDecoder struct {
+	typ        *runtime.Type
+	kind       reflect.Kind
+	op         func(unsafe.Pointer, int64)
+	structName string
+	fieldName  string
+}
+
+func newIntDecoder(typ *runtime.Type, structName, fieldName string, op func(unsafe.Pointer, int64)) *intDecoder {
+	return &intDecoder{
+		typ:        typ,
+		kind:       typ.Kind(),
+		op:         op,
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *intDecoder) typeError(buf []byte, offset int64) *errors.UnmarshalTypeError {
+	return &errors.UnmarshalTypeError{
+		Value:  fmt.Sprintf("number %s", string(buf)),
+		Type:   runtime.RType2Type(d.typ),
+		Struct: d.structName,
+		Field:  d.fieldName,
+		Offset: offset,
+	}
+}
+
+var (
+	pow10i64 = [...]int64{
+		1e00, 1e01, 1e02, 1e03, 1e04, 1e05, 1e06, 1e07, 1e08, 1e09,
+		1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18,
+	}
+	pow10i64Len = len(pow10i64)
+)
+
+func (d *intDecoder) parseInt(b []byte) (int64, error) {
+	isNegative := false
+	if b[0] == '-' {
+		b = b[1:]
+		isNegative = true
+	}
+	maxDigit := len(b)
+	if maxDigit > pow10i64Len {
+		return 0, fmt.Errorf("invalid length of number")
+	}
+	sum := int64(0)
+	for i := 0; i < maxDigit; i++ {
+		c := int64(b[i]) - 48
+		digitValue := pow10i64[maxDigit-i-1]
+		sum += c * digitValue
+	}
+	if isNegative {
+		return -1 * sum, nil
+	}
+	return sum, nil
+}
+
+var (
+	numTable = [256]bool{
+		'0': true,
+		'1': true,
+		'2': true,
+		'3': true,
+		'4': true,
+		'5': true,
+		'6': true,
+		'7': true,
+		'8': true,
+		'9': true,
+	}
+)
+
+var (
+	numZeroBuf = []byte{'0'}
+)
+
+func (d *intDecoder) decodeStreamByte(s *Stream) ([]byte, error) {
+	for {
+		switch s.char() {
+		case ' ', '\n', '\t', '\r':
+			s.cursor++
+			continue
+		case '-':
+			start := s.cursor
+			for {
+				s.cursor++
+				if numTable[s.char()] {
+					continue
+				} else if s.char() == nul {
+					if s.read() {
+						s.cursor-- // for retry current character
+						continue
+					}
+				}
+				break
+			}
+			num := s.buf[start:s.cursor]
+			if len(num) < 2 {
+				goto ERROR
+			}
+			return num, nil
+		case '0':
+			s.cursor++
+			return numZeroBuf, nil
+		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			start := s.cursor
+			for {
+				s.cursor++
+				if numTable[s.char()] {
+					continue
+				} else if s.char() == nul {
+					if s.read() {
+						s.cursor-- // for retry current character
+						continue
+					}
+				}
+				break
+			}
+			num := s.buf[start:s.cursor]
+			return num, nil
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return nil, err
+			}
+			return nil, nil
+		case nul:
+			if s.read() {
+				continue
+			}
+			goto ERROR
+		default:
+			return nil, d.typeError([]byte{s.char()}, s.totalOffset())
+		}
+	}
+ERROR:
+	return nil, errors.ErrUnexpectedEndOfJSON("number(integer)", s.totalOffset())
+}
+
+func (d *intDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {
+	b := (*sliceHeader)(unsafe.Pointer(&buf)).data
+	for {
+		switch char(b, cursor) {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case '0':
+			cursor++
+			return numZeroBuf, cursor, nil
+		case '-', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			start := cursor
+			cursor++
+			for numTable[char(b, cursor)] {
+				cursor++
+			}
+			num := buf[start:cursor]
+			return num, cursor, nil
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return nil, 0, err
+			}
+			cursor += 4
+			return nil, cursor, nil
+		default:
+			return nil, 0, d.typeError([]byte{char(b, cursor)}, cursor)
+		}
+	}
+}
+
+func (d *intDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	bytes, err := d.decodeStreamByte(s)
+	if err != nil {
+		return err
+	}
+	if bytes == nil {
+		return nil
+	}
+	i64, err := d.parseInt(bytes)
+	if err != nil {
+		return d.typeError(bytes, s.totalOffset())
+	}
+	switch d.kind {
+	case reflect.Int8:
+		if i64 < -1*(1<<7) || (1<<7) <= i64 {
+			return d.typeError(bytes, s.totalOffset())
+		}
+	case reflect.Int16:
+		if i64 < -1*(1<<15) || (1<<15) <= i64 {
+			return d.typeError(bytes, s.totalOffset())
+		}
+	case reflect.Int32:
+		if i64 < -1*(1<<31) || (1<<31) <= i64 {
+			return d.typeError(bytes, s.totalOffset())
+		}
+	}
+	d.op(p, i64)
+	s.reset()
+	return nil
+}
+
+func (d *intDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	bytes, c, err := d.decodeByte(ctx.Buf, cursor)
+	if err != nil {
+		return 0, err
+	}
+	if bytes == nil {
+		return c, nil
+	}
+	cursor = c
+
+	i64, err := d.parseInt(bytes)
+	if err != nil {
+		return 0, d.typeError(bytes, cursor)
+	}
+	switch d.kind {
+	case reflect.Int8:
+		if i64 < -1*(1<<7) || (1<<7) <= i64 {
+			return 0, d.typeError(bytes, cursor)
+		}
+	case reflect.Int16:
+		if i64 < -1*(1<<15) || (1<<15) <= i64 {
+			return 0, d.typeError(bytes, cursor)
+		}
+	case reflect.Int32:
+		if i64 < -1*(1<<31) || (1<<31) <= i64 {
+			return 0, d.typeError(bytes, cursor)
+		}
+	}
+	d.op(p, i64)
+	return cursor, nil
+}
+
+func (d *intDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: int decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/interface.go b/vendor/github.com/goccy/go-json/internal/decoder/interface.go
new file mode 100644
index 00000000..45c69ab8
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/interface.go
@@ -0,0 +1,528 @@
+package decoder
+
+import (
+	"bytes"
+	"encoding"
+	"encoding/json"
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type interfaceDecoder struct {
+	typ           *runtime.Type
+	structName    string
+	fieldName     string
+	sliceDecoder  *sliceDecoder
+	mapDecoder    *mapDecoder
+	floatDecoder  *floatDecoder
+	numberDecoder *numberDecoder
+	stringDecoder *stringDecoder
+}
+
+func newEmptyInterfaceDecoder(structName, fieldName string) *interfaceDecoder {
+	ifaceDecoder := &interfaceDecoder{
+		typ:        emptyInterfaceType,
+		structName: structName,
+		fieldName:  fieldName,
+		floatDecoder: newFloatDecoder(structName, fieldName, func(p unsafe.Pointer, v float64) {
+			*(*interface{})(p) = v
+		}),
+		numberDecoder: newNumberDecoder(structName, fieldName, func(p unsafe.Pointer, v json.Number) {
+			*(*interface{})(p) = v
+		}),
+		stringDecoder: newStringDecoder(structName, fieldName),
+	}
+	ifaceDecoder.sliceDecoder = newSliceDecoder(
+		ifaceDecoder,
+		emptyInterfaceType,
+		emptyInterfaceType.Size(),
+		structName, fieldName,
+	)
+	ifaceDecoder.mapDecoder = newMapDecoder(
+		interfaceMapType,
+		stringType,
+		ifaceDecoder.stringDecoder,
+		interfaceMapType.Elem(),
+		ifaceDecoder,
+		structName,
+		fieldName,
+	)
+	return ifaceDecoder
+}
+
+func newInterfaceDecoder(typ *runtime.Type, structName, fieldName string) *interfaceDecoder {
+	emptyIfaceDecoder := newEmptyInterfaceDecoder(structName, fieldName)
+	stringDecoder := newStringDecoder(structName, fieldName)
+	return &interfaceDecoder{
+		typ:        typ,
+		structName: structName,
+		fieldName:  fieldName,
+		sliceDecoder: newSliceDecoder(
+			emptyIfaceDecoder,
+			emptyInterfaceType,
+			emptyInterfaceType.Size(),
+			structName, fieldName,
+		),
+		mapDecoder: newMapDecoder(
+			interfaceMapType,
+			stringType,
+			stringDecoder,
+			interfaceMapType.Elem(),
+			emptyIfaceDecoder,
+			structName,
+			fieldName,
+		),
+		floatDecoder: newFloatDecoder(structName, fieldName, func(p unsafe.Pointer, v float64) {
+			*(*interface{})(p) = v
+		}),
+		numberDecoder: newNumberDecoder(structName, fieldName, func(p unsafe.Pointer, v json.Number) {
+			*(*interface{})(p) = v
+		}),
+		stringDecoder: stringDecoder,
+	}
+}
+
+func (d *interfaceDecoder) numDecoder(s *Stream) Decoder {
+	if s.UseNumber {
+		return d.numberDecoder
+	}
+	return d.floatDecoder
+}
+
+var (
+	emptyInterfaceType = runtime.Type2RType(reflect.TypeOf((*interface{})(nil)).Elem())
+	EmptyInterfaceType = emptyInterfaceType
+	interfaceMapType   = runtime.Type2RType(
+		reflect.TypeOf((*map[string]interface{})(nil)).Elem(),
+	)
+	stringType = runtime.Type2RType(
+		reflect.TypeOf(""),
+	)
+)
+
+func decodeStreamUnmarshaler(s *Stream, depth int64, unmarshaler json.Unmarshaler) error {
+	start := s.cursor
+	if err := s.skipValue(depth); err != nil {
+		return err
+	}
+	src := s.buf[start:s.cursor]
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	if err := unmarshaler.UnmarshalJSON(dst); err != nil {
+		return err
+	}
+	return nil
+}
+
+func decodeStreamUnmarshalerContext(s *Stream, depth int64, unmarshaler unmarshalerContext) error {
+	start := s.cursor
+	if err := s.skipValue(depth); err != nil {
+		return err
+	}
+	src := s.buf[start:s.cursor]
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	if err := unmarshaler.UnmarshalJSON(s.Option.Context, dst); err != nil {
+		return err
+	}
+	return nil
+}
+
+func decodeUnmarshaler(buf []byte, cursor, depth int64, unmarshaler json.Unmarshaler) (int64, error) {
+	cursor = skipWhiteSpace(buf, cursor)
+	start := cursor
+	end, err := skipValue(buf, cursor, depth)
+	if err != nil {
+		return 0, err
+	}
+	src := buf[start:end]
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	if err := unmarshaler.UnmarshalJSON(dst); err != nil {
+		return 0, err
+	}
+	return end, nil
+}
+
+func decodeUnmarshalerContext(ctx *RuntimeContext, buf []byte, cursor, depth int64, unmarshaler unmarshalerContext) (int64, error) {
+	cursor = skipWhiteSpace(buf, cursor)
+	start := cursor
+	end, err := skipValue(buf, cursor, depth)
+	if err != nil {
+		return 0, err
+	}
+	src := buf[start:end]
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	if err := unmarshaler.UnmarshalJSON(ctx.Option.Context, dst); err != nil {
+		return 0, err
+	}
+	return end, nil
+}
+
+func decodeStreamTextUnmarshaler(s *Stream, depth int64, unmarshaler encoding.TextUnmarshaler, p unsafe.Pointer) error {
+	start := s.cursor
+	if err := s.skipValue(depth); err != nil {
+		return err
+	}
+	src := s.buf[start:s.cursor]
+	if bytes.Equal(src, nullbytes) {
+		*(*unsafe.Pointer)(p) = nil
+		return nil
+	}
+
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	if err := unmarshaler.UnmarshalText(dst); err != nil {
+		return err
+	}
+	return nil
+}
+
+func decodeTextUnmarshaler(buf []byte, cursor, depth int64, unmarshaler encoding.TextUnmarshaler, p unsafe.Pointer) (int64, error) {
+	cursor = skipWhiteSpace(buf, cursor)
+	start := cursor
+	end, err := skipValue(buf, cursor, depth)
+	if err != nil {
+		return 0, err
+	}
+	src := buf[start:end]
+	if bytes.Equal(src, nullbytes) {
+		*(*unsafe.Pointer)(p) = nil
+		return end, nil
+	}
+	if s, ok := unquoteBytes(src); ok {
+		src = s
+	}
+	if err := unmarshaler.UnmarshalText(src); err != nil {
+		return 0, err
+	}
+	return end, nil
+}
+
+func (d *interfaceDecoder) decodeStreamEmptyInterface(s *Stream, depth int64, p unsafe.Pointer) error {
+	c := s.skipWhiteSpace()
+	for {
+		switch c {
+		case '{':
+			var v map[string]interface{}
+			ptr := unsafe.Pointer(&v)
+			if err := d.mapDecoder.DecodeStream(s, depth, ptr); err != nil {
+				return err
+			}
+			*(*interface{})(p) = v
+			return nil
+		case '[':
+			var v []interface{}
+			ptr := unsafe.Pointer(&v)
+			if err := d.sliceDecoder.DecodeStream(s, depth, ptr); err != nil {
+				return err
+			}
+			*(*interface{})(p) = v
+			return nil
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return d.numDecoder(s).DecodeStream(s, depth, p)
+		case '"':
+			s.cursor++
+			start := s.cursor
+			for {
+				switch s.char() {
+				case '\\':
+					if _, err := decodeEscapeString(s, nil); err != nil {
+						return err
+					}
+				case '"':
+					literal := s.buf[start:s.cursor]
+					s.cursor++
+					*(*interface{})(p) = string(literal)
+					return nil
+				case nul:
+					if s.read() {
+						continue
+					}
+					return errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+				}
+				s.cursor++
+			}
+		case 't':
+			if err := trueBytes(s); err != nil {
+				return err
+			}
+			**(**interface{})(unsafe.Pointer(&p)) = true
+			return nil
+		case 'f':
+			if err := falseBytes(s); err != nil {
+				return err
+			}
+			**(**interface{})(unsafe.Pointer(&p)) = false
+			return nil
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return err
+			}
+			*(*interface{})(p) = nil
+			return nil
+		case nul:
+			if s.read() {
+				c = s.char()
+				continue
+			}
+		}
+		break
+	}
+	return errors.ErrInvalidBeginningOfValue(c, s.totalOffset())
+}
+
+type emptyInterface struct {
+	typ *runtime.Type
+	ptr unsafe.Pointer
+}
+
+func (d *interfaceDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	runtimeInterfaceValue := *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: d.typ,
+		ptr: p,
+	}))
+	rv := reflect.ValueOf(runtimeInterfaceValue)
+	if rv.NumMethod() > 0 && rv.CanInterface() {
+		if u, ok := rv.Interface().(unmarshalerContext); ok {
+			return decodeStreamUnmarshalerContext(s, depth, u)
+		}
+		if u, ok := rv.Interface().(json.Unmarshaler); ok {
+			return decodeStreamUnmarshaler(s, depth, u)
+		}
+		if u, ok := rv.Interface().(encoding.TextUnmarshaler); ok {
+			return decodeStreamTextUnmarshaler(s, depth, u, p)
+		}
+		if s.skipWhiteSpace() == 'n' {
+			if err := nullBytes(s); err != nil {
+				return err
+			}
+			*(*interface{})(p) = nil
+			return nil
+		}
+		return d.errUnmarshalType(rv.Type(), s.totalOffset())
+	}
+	iface := rv.Interface()
+	ifaceHeader := (*emptyInterface)(unsafe.Pointer(&iface))
+	typ := ifaceHeader.typ
+	if ifaceHeader.ptr == nil || d.typ == typ || typ == nil {
+		// concrete type is empty interface
+		return d.decodeStreamEmptyInterface(s, depth, p)
+	}
+	if typ.Kind() == reflect.Ptr && typ.Elem() == d.typ || typ.Kind() != reflect.Ptr {
+		return d.decodeStreamEmptyInterface(s, depth, p)
+	}
+	if s.skipWhiteSpace() == 'n' {
+		if err := nullBytes(s); err != nil {
+			return err
+		}
+		*(*interface{})(p) = nil
+		return nil
+	}
+	decoder, err := CompileToGetDecoder(typ)
+	if err != nil {
+		return err
+	}
+	return decoder.DecodeStream(s, depth, ifaceHeader.ptr)
+}
+
+func (d *interfaceDecoder) errUnmarshalType(typ reflect.Type, offset int64) *errors.UnmarshalTypeError {
+	return &errors.UnmarshalTypeError{
+		Value:  typ.String(),
+		Type:   typ,
+		Offset: offset,
+		Struct: d.structName,
+		Field:  d.fieldName,
+	}
+}
+
+func (d *interfaceDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	runtimeInterfaceValue := *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: d.typ,
+		ptr: p,
+	}))
+	rv := reflect.ValueOf(runtimeInterfaceValue)
+	if rv.NumMethod() > 0 && rv.CanInterface() {
+		if u, ok := rv.Interface().(unmarshalerContext); ok {
+			return decodeUnmarshalerContext(ctx, buf, cursor, depth, u)
+		}
+		if u, ok := rv.Interface().(json.Unmarshaler); ok {
+			return decodeUnmarshaler(buf, cursor, depth, u)
+		}
+		if u, ok := rv.Interface().(encoding.TextUnmarshaler); ok {
+			return decodeTextUnmarshaler(buf, cursor, depth, u, p)
+		}
+		cursor = skipWhiteSpace(buf, cursor)
+		if buf[cursor] == 'n' {
+			if err := validateNull(buf, cursor); err != nil {
+				return 0, err
+			}
+			cursor += 4
+			**(**interface{})(unsafe.Pointer(&p)) = nil
+			return cursor, nil
+		}
+		return 0, d.errUnmarshalType(rv.Type(), cursor)
+	}
+
+	iface := rv.Interface()
+	ifaceHeader := (*emptyInterface)(unsafe.Pointer(&iface))
+	typ := ifaceHeader.typ
+	if ifaceHeader.ptr == nil || d.typ == typ || typ == nil {
+		// concrete type is empty interface
+		return d.decodeEmptyInterface(ctx, cursor, depth, p)
+	}
+	if typ.Kind() == reflect.Ptr && typ.Elem() == d.typ || typ.Kind() != reflect.Ptr {
+		return d.decodeEmptyInterface(ctx, cursor, depth, p)
+	}
+	cursor = skipWhiteSpace(buf, cursor)
+	if buf[cursor] == 'n' {
+		if err := validateNull(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 4
+		**(**interface{})(unsafe.Pointer(&p)) = nil
+		return cursor, nil
+	}
+	decoder, err := CompileToGetDecoder(typ)
+	if err != nil {
+		return 0, err
+	}
+	return decoder.Decode(ctx, cursor, depth, ifaceHeader.ptr)
+}
+
+func (d *interfaceDecoder) decodeEmptyInterface(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	switch buf[cursor] {
+	case '{':
+		var v map[string]interface{}
+		ptr := unsafe.Pointer(&v)
+		cursor, err := d.mapDecoder.Decode(ctx, cursor, depth, ptr)
+		if err != nil {
+			return 0, err
+		}
+		**(**interface{})(unsafe.Pointer(&p)) = v
+		return cursor, nil
+	case '[':
+		var v []interface{}
+		ptr := unsafe.Pointer(&v)
+		cursor, err := d.sliceDecoder.Decode(ctx, cursor, depth, ptr)
+		if err != nil {
+			return 0, err
+		}
+		**(**interface{})(unsafe.Pointer(&p)) = v
+		return cursor, nil
+	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+		return d.floatDecoder.Decode(ctx, cursor, depth, p)
+	case '"':
+		var v string
+		ptr := unsafe.Pointer(&v)
+		cursor, err := d.stringDecoder.Decode(ctx, cursor, depth, ptr)
+		if err != nil {
+			return 0, err
+		}
+		**(**interface{})(unsafe.Pointer(&p)) = v
+		return cursor, nil
+	case 't':
+		if err := validateTrue(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 4
+		**(**interface{})(unsafe.Pointer(&p)) = true
+		return cursor, nil
+	case 'f':
+		if err := validateFalse(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 5
+		**(**interface{})(unsafe.Pointer(&p)) = false
+		return cursor, nil
+	case 'n':
+		if err := validateNull(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 4
+		**(**interface{})(unsafe.Pointer(&p)) = nil
+		return cursor, nil
+	}
+	return cursor, errors.ErrInvalidBeginningOfValue(buf[cursor], cursor)
+}
+
+func NewPathDecoder() Decoder {
+	ifaceDecoder := &interfaceDecoder{
+		typ:        emptyInterfaceType,
+		structName: "",
+		fieldName:  "",
+		floatDecoder: newFloatDecoder("", "", func(p unsafe.Pointer, v float64) {
+			*(*interface{})(p) = v
+		}),
+		numberDecoder: newNumberDecoder("", "", func(p unsafe.Pointer, v json.Number) {
+			*(*interface{})(p) = v
+		}),
+		stringDecoder: newStringDecoder("", ""),
+	}
+	ifaceDecoder.sliceDecoder = newSliceDecoder(
+		ifaceDecoder,
+		emptyInterfaceType,
+		emptyInterfaceType.Size(),
+		"", "",
+	)
+	ifaceDecoder.mapDecoder = newMapDecoder(
+		interfaceMapType,
+		stringType,
+		ifaceDecoder.stringDecoder,
+		interfaceMapType.Elem(),
+		ifaceDecoder,
+		"", "",
+	)
+	return ifaceDecoder
+}
+
+var (
+	truebytes  = []byte("true")
+	falsebytes = []byte("false")
+)
+
+func (d *interfaceDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	switch buf[cursor] {
+	case '{':
+		return d.mapDecoder.DecodePath(ctx, cursor, depth)
+	case '[':
+		return d.sliceDecoder.DecodePath(ctx, cursor, depth)
+	case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+		return d.floatDecoder.DecodePath(ctx, cursor, depth)
+	case '"':
+		return d.stringDecoder.DecodePath(ctx, cursor, depth)
+	case 't':
+		if err := validateTrue(buf, cursor); err != nil {
+			return nil, 0, err
+		}
+		cursor += 4
+		return [][]byte{truebytes}, cursor, nil
+	case 'f':
+		if err := validateFalse(buf, cursor); err != nil {
+			return nil, 0, err
+		}
+		cursor += 5
+		return [][]byte{falsebytes}, cursor, nil
+	case 'n':
+		if err := validateNull(buf, cursor); err != nil {
+			return nil, 0, err
+		}
+		cursor += 4
+		return [][]byte{nullbytes}, cursor, nil
+	}
+	return nil, cursor, errors.ErrInvalidBeginningOfValue(buf[cursor], cursor)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/invalid.go b/vendor/github.com/goccy/go-json/internal/decoder/invalid.go
new file mode 100644
index 00000000..4c9721b0
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/invalid.go
@@ -0,0 +1,55 @@
+package decoder
+
+import (
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type invalidDecoder struct {
+	typ        *runtime.Type
+	kind       reflect.Kind
+	structName string
+	fieldName  string
+}
+
+func newInvalidDecoder(typ *runtime.Type, structName, fieldName string) *invalidDecoder {
+	return &invalidDecoder{
+		typ:        typ,
+		kind:       typ.Kind(),
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *invalidDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	return &errors.UnmarshalTypeError{
+		Value:  "object",
+		Type:   runtime.RType2Type(d.typ),
+		Offset: s.totalOffset(),
+		Struct: d.structName,
+		Field:  d.fieldName,
+	}
+}
+
+func (d *invalidDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	return 0, &errors.UnmarshalTypeError{
+		Value:  "object",
+		Type:   runtime.RType2Type(d.typ),
+		Offset: cursor,
+		Struct: d.structName,
+		Field:  d.fieldName,
+	}
+}
+
+func (d *invalidDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, &errors.UnmarshalTypeError{
+		Value:  "object",
+		Type:   runtime.RType2Type(d.typ),
+		Offset: cursor,
+		Struct: d.structName,
+		Field:  d.fieldName,
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/map.go b/vendor/github.com/goccy/go-json/internal/decoder/map.go
new file mode 100644
index 00000000..07a9caea
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/map.go
@@ -0,0 +1,280 @@
+package decoder
+
+import (
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type mapDecoder struct {
+	mapType                 *runtime.Type
+	keyType                 *runtime.Type
+	valueType               *runtime.Type
+	canUseAssignFaststrType bool
+	keyDecoder              Decoder
+	valueDecoder            Decoder
+	structName              string
+	fieldName               string
+}
+
+func newMapDecoder(mapType *runtime.Type, keyType *runtime.Type, keyDec Decoder, valueType *runtime.Type, valueDec Decoder, structName, fieldName string) *mapDecoder {
+	return &mapDecoder{
+		mapType:                 mapType,
+		keyDecoder:              keyDec,
+		keyType:                 keyType,
+		canUseAssignFaststrType: canUseAssignFaststrType(keyType, valueType),
+		valueType:               valueType,
+		valueDecoder:            valueDec,
+		structName:              structName,
+		fieldName:               fieldName,
+	}
+}
+
+const (
+	mapMaxElemSize = 128
+)
+
+// See detail: https://github.com/goccy/go-json/pull/283
+func canUseAssignFaststrType(key *runtime.Type, value *runtime.Type) bool {
+	indirectElem := value.Size() > mapMaxElemSize
+	if indirectElem {
+		return false
+	}
+	return key.Kind() == reflect.String
+}
+
+//go:linkname makemap reflect.makemap
+func makemap(*runtime.Type, int) unsafe.Pointer
+
+//nolint:golint
+//go:linkname mapassign_faststr runtime.mapassign_faststr
+//go:noescape
+func mapassign_faststr(t *runtime.Type, m unsafe.Pointer, s string) unsafe.Pointer
+
+//go:linkname mapassign reflect.mapassign
+//go:noescape
+func mapassign(t *runtime.Type, m unsafe.Pointer, k, v unsafe.Pointer)
+
+func (d *mapDecoder) mapassign(t *runtime.Type, m, k, v unsafe.Pointer) {
+	if d.canUseAssignFaststrType {
+		mapV := mapassign_faststr(t, m, *(*string)(k))
+		typedmemmove(d.valueType, mapV, v)
+	} else {
+		mapassign(t, m, k, v)
+	}
+}
+
+func (d *mapDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+	}
+
+	switch s.skipWhiteSpace() {
+	case 'n':
+		if err := nullBytes(s); err != nil {
+			return err
+		}
+		**(**unsafe.Pointer)(unsafe.Pointer(&p)) = nil
+		return nil
+	case '{':
+	default:
+		return errors.ErrExpected("{ character for map value", s.totalOffset())
+	}
+	mapValue := *(*unsafe.Pointer)(p)
+	if mapValue == nil {
+		mapValue = makemap(d.mapType, 0)
+	}
+	s.cursor++
+	if s.skipWhiteSpace() == '}' {
+		*(*unsafe.Pointer)(p) = mapValue
+		s.cursor++
+		return nil
+	}
+	for {
+		k := unsafe_New(d.keyType)
+		if err := d.keyDecoder.DecodeStream(s, depth, k); err != nil {
+			return err
+		}
+		s.skipWhiteSpace()
+		if !s.equalChar(':') {
+			return errors.ErrExpected("colon after object key", s.totalOffset())
+		}
+		s.cursor++
+		v := unsafe_New(d.valueType)
+		if err := d.valueDecoder.DecodeStream(s, depth, v); err != nil {
+			return err
+		}
+		d.mapassign(d.mapType, mapValue, k, v)
+		s.skipWhiteSpace()
+		if s.equalChar('}') {
+			**(**unsafe.Pointer)(unsafe.Pointer(&p)) = mapValue
+			s.cursor++
+			return nil
+		}
+		if !s.equalChar(',') {
+			return errors.ErrExpected("comma after object value", s.totalOffset())
+		}
+		s.cursor++
+	}
+}
+
+func (d *mapDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+	}
+
+	cursor = skipWhiteSpace(buf, cursor)
+	buflen := int64(len(buf))
+	if buflen < 2 {
+		return 0, errors.ErrExpected("{} for map", cursor)
+	}
+	switch buf[cursor] {
+	case 'n':
+		if err := validateNull(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 4
+		**(**unsafe.Pointer)(unsafe.Pointer(&p)) = nil
+		return cursor, nil
+	case '{':
+	default:
+		return 0, errors.ErrExpected("{ character for map value", cursor)
+	}
+	cursor++
+	cursor = skipWhiteSpace(buf, cursor)
+	mapValue := *(*unsafe.Pointer)(p)
+	if mapValue == nil {
+		mapValue = makemap(d.mapType, 0)
+	}
+	if buf[cursor] == '}' {
+		**(**unsafe.Pointer)(unsafe.Pointer(&p)) = mapValue
+		cursor++
+		return cursor, nil
+	}
+	for {
+		k := unsafe_New(d.keyType)
+		keyCursor, err := d.keyDecoder.Decode(ctx, cursor, depth, k)
+		if err != nil {
+			return 0, err
+		}
+		cursor = skipWhiteSpace(buf, keyCursor)
+		if buf[cursor] != ':' {
+			return 0, errors.ErrExpected("colon after object key", cursor)
+		}
+		cursor++
+		v := unsafe_New(d.valueType)
+		valueCursor, err := d.valueDecoder.Decode(ctx, cursor, depth, v)
+		if err != nil {
+			return 0, err
+		}
+		d.mapassign(d.mapType, mapValue, k, v)
+		cursor = skipWhiteSpace(buf, valueCursor)
+		if buf[cursor] == '}' {
+			**(**unsafe.Pointer)(unsafe.Pointer(&p)) = mapValue
+			cursor++
+			return cursor, nil
+		}
+		if buf[cursor] != ',' {
+			return 0, errors.ErrExpected("comma after object value", cursor)
+		}
+		cursor++
+	}
+}
+
+func (d *mapDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	buf := ctx.Buf
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return nil, 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+	}
+
+	cursor = skipWhiteSpace(buf, cursor)
+	buflen := int64(len(buf))
+	if buflen < 2 {
+		return nil, 0, errors.ErrExpected("{} for map", cursor)
+	}
+	switch buf[cursor] {
+	case 'n':
+		if err := validateNull(buf, cursor); err != nil {
+			return nil, 0, err
+		}
+		cursor += 4
+		return [][]byte{nullbytes}, cursor, nil
+	case '{':
+	default:
+		return nil, 0, errors.ErrExpected("{ character for map value", cursor)
+	}
+	cursor++
+	cursor = skipWhiteSpace(buf, cursor)
+	if buf[cursor] == '}' {
+		cursor++
+		return nil, cursor, nil
+	}
+	keyDecoder, ok := d.keyDecoder.(*stringDecoder)
+	if !ok {
+		return nil, 0, &errors.UnmarshalTypeError{
+			Value:  "string",
+			Type:   reflect.TypeOf(""),
+			Offset: cursor,
+			Struct: d.structName,
+			Field:  d.fieldName,
+		}
+	}
+	ret := [][]byte{}
+	for {
+		key, keyCursor, err := keyDecoder.decodeByte(buf, cursor)
+		if err != nil {
+			return nil, 0, err
+		}
+		cursor = skipWhiteSpace(buf, keyCursor)
+		if buf[cursor] != ':' {
+			return nil, 0, errors.ErrExpected("colon after object key", cursor)
+		}
+		cursor++
+		child, found, err := ctx.Option.Path.Field(string(key))
+		if err != nil {
+			return nil, 0, err
+		}
+		if found {
+			if child != nil {
+				oldPath := ctx.Option.Path.node
+				ctx.Option.Path.node = child
+				paths, c, err := d.valueDecoder.DecodePath(ctx, cursor, depth)
+				if err != nil {
+					return nil, 0, err
+				}
+				ctx.Option.Path.node = oldPath
+				ret = append(ret, paths...)
+				cursor = c
+			} else {
+				start := cursor
+				end, err := skipValue(buf, cursor, depth)
+				if err != nil {
+					return nil, 0, err
+				}
+				ret = append(ret, buf[start:end])
+				cursor = end
+			}
+		} else {
+			c, err := skipValue(buf, cursor, depth)
+			if err != nil {
+				return nil, 0, err
+			}
+			cursor = c
+		}
+		cursor = skipWhiteSpace(buf, cursor)
+		if buf[cursor] == '}' {
+			cursor++
+			return ret, cursor, nil
+		}
+		if buf[cursor] != ',' {
+			return nil, 0, errors.ErrExpected("comma after object value", cursor)
+		}
+		cursor++
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/number.go b/vendor/github.com/goccy/go-json/internal/decoder/number.go
new file mode 100644
index 00000000..10e5435e
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/number.go
@@ -0,0 +1,123 @@
+package decoder
+
+import (
+	"encoding/json"
+	"strconv"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+type numberDecoder struct {
+	stringDecoder *stringDecoder
+	op            func(unsafe.Pointer, json.Number)
+	structName    string
+	fieldName     string
+}
+
+func newNumberDecoder(structName, fieldName string, op func(unsafe.Pointer, json.Number)) *numberDecoder {
+	return &numberDecoder{
+		stringDecoder: newStringDecoder(structName, fieldName),
+		op:            op,
+		structName:    structName,
+		fieldName:     fieldName,
+	}
+}
+
+func (d *numberDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	bytes, err := d.decodeStreamByte(s)
+	if err != nil {
+		return err
+	}
+	if _, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&bytes)), 64); err != nil {
+		return errors.ErrSyntax(err.Error(), s.totalOffset())
+	}
+	d.op(p, json.Number(string(bytes)))
+	s.reset()
+	return nil
+}
+
+func (d *numberDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	bytes, c, err := d.decodeByte(ctx.Buf, cursor)
+	if err != nil {
+		return 0, err
+	}
+	if _, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&bytes)), 64); err != nil {
+		return 0, errors.ErrSyntax(err.Error(), c)
+	}
+	cursor = c
+	s := *(*string)(unsafe.Pointer(&bytes))
+	d.op(p, json.Number(s))
+	return cursor, nil
+}
+
+func (d *numberDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	bytes, c, err := d.decodeByte(ctx.Buf, cursor)
+	if err != nil {
+		return nil, 0, err
+	}
+	if bytes == nil {
+		return [][]byte{nullbytes}, c, nil
+	}
+	return [][]byte{bytes}, c, nil
+}
+
+func (d *numberDecoder) decodeStreamByte(s *Stream) ([]byte, error) {
+	start := s.cursor
+	for {
+		switch s.char() {
+		case ' ', '\n', '\t', '\r':
+			s.cursor++
+			continue
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return floatBytes(s), nil
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return nil, err
+			}
+			return nil, nil
+		case '"':
+			return d.stringDecoder.decodeStreamByte(s)
+		case nul:
+			if s.read() {
+				continue
+			}
+			goto ERROR
+		default:
+			goto ERROR
+		}
+	}
+ERROR:
+	if s.cursor == start {
+		return nil, errors.ErrInvalidBeginningOfValue(s.char(), s.totalOffset())
+	}
+	return nil, errors.ErrUnexpectedEndOfJSON("json.Number", s.totalOffset())
+}
+
+func (d *numberDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {
+	for {
+		switch buf[cursor] {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			start := cursor
+			cursor++
+			for floatTable[buf[cursor]] {
+				cursor++
+			}
+			num := buf[start:cursor]
+			return num, cursor, nil
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return nil, 0, err
+			}
+			cursor += 4
+			return nil, cursor, nil
+		case '"':
+			return d.stringDecoder.decodeByte(buf, cursor)
+		default:
+			return nil, 0, errors.ErrUnexpectedEndOfJSON("json.Number", cursor)
+		}
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/option.go b/vendor/github.com/goccy/go-json/internal/decoder/option.go
new file mode 100644
index 00000000..502f772e
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/option.go
@@ -0,0 +1,17 @@
+package decoder
+
+import "context"
+
+type OptionFlags uint8
+
+const (
+	FirstWinOption OptionFlags = 1 << iota
+	ContextOption
+	PathOption
+)
+
+type Option struct {
+	Flags   OptionFlags
+	Context context.Context
+	Path    *Path
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/path.go b/vendor/github.com/goccy/go-json/internal/decoder/path.go
new file mode 100644
index 00000000..a15ff69e
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/path.go
@@ -0,0 +1,670 @@
+package decoder
+
+import (
+	"fmt"
+	"reflect"
+	"strconv"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type PathString string
+
+func (s PathString) Build() (*Path, error) {
+	builder := new(PathBuilder)
+	return builder.Build([]rune(s))
+}
+
+type PathBuilder struct {
+	root                    PathNode
+	node                    PathNode
+	singleQuotePathSelector bool
+	doubleQuotePathSelector bool
+}
+
+func (b *PathBuilder) Build(buf []rune) (*Path, error) {
+	node, err := b.build(buf)
+	if err != nil {
+		return nil, err
+	}
+	return &Path{
+		node:                    node,
+		RootSelectorOnly:        node == nil,
+		SingleQuotePathSelector: b.singleQuotePathSelector,
+		DoubleQuotePathSelector: b.doubleQuotePathSelector,
+	}, nil
+}
+
+func (b *PathBuilder) build(buf []rune) (PathNode, error) {
+	if len(buf) == 0 {
+		return nil, errors.ErrEmptyPath()
+	}
+	if buf[0] != '$' {
+		return nil, errors.ErrInvalidPath("JSON Path must start with a $ character")
+	}
+	if len(buf) == 1 {
+		return nil, nil
+	}
+	buf = buf[1:]
+	offset, err := b.buildNext(buf)
+	if err != nil {
+		return nil, err
+	}
+	if len(buf) > offset {
+		return nil, errors.ErrInvalidPath("remain invalid path %q", buf[offset:])
+	}
+	return b.root, nil
+}
+
+func (b *PathBuilder) buildNextCharIfExists(buf []rune, cursor int) (int, error) {
+	if len(buf) > cursor {
+		offset, err := b.buildNext(buf[cursor:])
+		if err != nil {
+			return 0, err
+		}
+		return cursor + 1 + offset, nil
+	}
+	return cursor, nil
+}
+
+func (b *PathBuilder) buildNext(buf []rune) (int, error) {
+	switch buf[0] {
+	case '.':
+		if len(buf) == 1 {
+			return 0, errors.ErrInvalidPath("JSON Path ends with dot character")
+		}
+		offset, err := b.buildSelector(buf[1:])
+		if err != nil {
+			return 0, err
+		}
+		return offset + 1, nil
+	case '[':
+		if len(buf) == 1 {
+			return 0, errors.ErrInvalidPath("JSON Path ends with left bracket character")
+		}
+		offset, err := b.buildIndex(buf[1:])
+		if err != nil {
+			return 0, err
+		}
+		return offset + 1, nil
+	default:
+		return 0, errors.ErrInvalidPath("expect dot or left bracket character. but found %c character", buf[0])
+	}
+}
+
+func (b *PathBuilder) buildSelector(buf []rune) (int, error) {
+	switch buf[0] {
+	case '.':
+		if len(buf) == 1 {
+			return 0, errors.ErrInvalidPath("JSON Path ends with double dot character")
+		}
+		offset, err := b.buildPathRecursive(buf[1:])
+		if err != nil {
+			return 0, err
+		}
+		return 1 + offset, nil
+	case '[', ']', '$', '*':
+		return 0, errors.ErrInvalidPath("found invalid path character %c after dot", buf[0])
+	}
+	for cursor := 0; cursor < len(buf); cursor++ {
+		switch buf[cursor] {
+		case '$', '*', ']':
+			return 0, errors.ErrInvalidPath("found %c character in field selector context", buf[cursor])
+		case '.':
+			if cursor+1 >= len(buf) {
+				return 0, errors.ErrInvalidPath("JSON Path ends with dot character")
+			}
+			selector := buf[:cursor]
+			b.addSelectorNode(string(selector))
+			offset, err := b.buildSelector(buf[cursor+1:])
+			if err != nil {
+				return 0, err
+			}
+			return cursor + 1 + offset, nil
+		case '[':
+			if cursor+1 >= len(buf) {
+				return 0, errors.ErrInvalidPath("JSON Path ends with left bracket character")
+			}
+			selector := buf[:cursor]
+			b.addSelectorNode(string(selector))
+			offset, err := b.buildIndex(buf[cursor+1:])
+			if err != nil {
+				return 0, err
+			}
+			return cursor + 1 + offset, nil
+		case '"':
+			if cursor+1 >= len(buf) {
+				return 0, errors.ErrInvalidPath("JSON Path ends with double quote character")
+			}
+			offset, err := b.buildQuoteSelector(buf[cursor+1:], DoubleQuotePathSelector)
+			if err != nil {
+				return 0, err
+			}
+			return cursor + 1 + offset, nil
+		}
+	}
+	b.addSelectorNode(string(buf))
+	return len(buf), nil
+}
+
+func (b *PathBuilder) buildQuoteSelector(buf []rune, sel QuotePathSelector) (int, error) {
+	switch buf[0] {
+	case '[', ']', '$', '.', '*', '\'', '"':
+		return 0, errors.ErrInvalidPath("found invalid path character %c after quote", buf[0])
+	}
+	for cursor := 0; cursor < len(buf); cursor++ {
+		switch buf[cursor] {
+		case '\'':
+			if sel != SingleQuotePathSelector {
+				return 0, errors.ErrInvalidPath("found double quote character in field selector with single quote context")
+			}
+			if len(buf) <= cursor+1 {
+				return 0, errors.ErrInvalidPath("JSON Path ends with single quote character in field selector context")
+			}
+			if buf[cursor+1] != ']' {
+				return 0, errors.ErrInvalidPath("expect right bracket for field selector with single quote but found %c", buf[cursor+1])
+			}
+			selector := buf[:cursor]
+			b.addSelectorNode(string(selector))
+			b.singleQuotePathSelector = true
+			return b.buildNextCharIfExists(buf, cursor+2)
+		case '"':
+			if sel != DoubleQuotePathSelector {
+				return 0, errors.ErrInvalidPath("found single quote character in field selector with double quote context")
+			}
+			selector := buf[:cursor]
+			b.addSelectorNode(string(selector))
+			b.doubleQuotePathSelector = true
+			return b.buildNextCharIfExists(buf, cursor+1)
+		}
+	}
+	return 0, errors.ErrInvalidPath("couldn't find quote character in selector quote path context")
+}
+
+func (b *PathBuilder) buildPathRecursive(buf []rune) (int, error) {
+	switch buf[0] {
+	case '.', '[', ']', '$', '*':
+		return 0, errors.ErrInvalidPath("found invalid path character %c after double dot", buf[0])
+	}
+	for cursor := 0; cursor < len(buf); cursor++ {
+		switch buf[cursor] {
+		case '$', '*', ']':
+			return 0, errors.ErrInvalidPath("found %c character in field selector context", buf[cursor])
+		case '.':
+			if cursor+1 >= len(buf) {
+				return 0, errors.ErrInvalidPath("JSON Path ends with dot character")
+			}
+			selector := buf[:cursor]
+			b.addRecursiveNode(string(selector))
+			offset, err := b.buildSelector(buf[cursor+1:])
+			if err != nil {
+				return 0, err
+			}
+			return cursor + 1 + offset, nil
+		case '[':
+			if cursor+1 >= len(buf) {
+				return 0, errors.ErrInvalidPath("JSON Path ends with left bracket character")
+			}
+			selector := buf[:cursor]
+			b.addRecursiveNode(string(selector))
+			offset, err := b.buildIndex(buf[cursor+1:])
+			if err != nil {
+				return 0, err
+			}
+			return cursor + 1 + offset, nil
+		}
+	}
+	b.addRecursiveNode(string(buf))
+	return len(buf), nil
+}
+
+func (b *PathBuilder) buildIndex(buf []rune) (int, error) {
+	switch buf[0] {
+	case '.', '[', ']', '$':
+		return 0, errors.ErrInvalidPath("found invalid path character %c after left bracket", buf[0])
+	case '\'':
+		if len(buf) == 1 {
+			return 0, errors.ErrInvalidPath("JSON Path ends with single quote character")
+		}
+		offset, err := b.buildQuoteSelector(buf[1:], SingleQuotePathSelector)
+		if err != nil {
+			return 0, err
+		}
+		return 1 + offset, nil
+	case '*':
+		if len(buf) == 1 {
+			return 0, errors.ErrInvalidPath("JSON Path ends with star character")
+		}
+		if buf[1] != ']' {
+			return 0, errors.ErrInvalidPath("expect right bracket character for index all path but found %c character", buf[1])
+		}
+		b.addIndexAllNode()
+		offset := len("*]")
+		if len(buf) > 2 {
+			buildOffset, err := b.buildNext(buf[2:])
+			if err != nil {
+				return 0, err
+			}
+			return offset + buildOffset, nil
+		}
+		return offset, nil
+	}
+
+	for cursor := 0; cursor < len(buf); cursor++ {
+		switch buf[cursor] {
+		case ']':
+			index, err := strconv.ParseInt(string(buf[:cursor]), 10, 64)
+			if err != nil {
+				return 0, errors.ErrInvalidPath("%q is unexpected index path", buf[:cursor])
+			}
+			b.addIndexNode(int(index))
+			return b.buildNextCharIfExists(buf, cursor+1)
+		}
+	}
+	return 0, errors.ErrInvalidPath("couldn't find right bracket character in index path context")
+}
+
+func (b *PathBuilder) addIndexAllNode() {
+	node := newPathIndexAllNode()
+	if b.root == nil {
+		b.root = node
+		b.node = node
+	} else {
+		b.node = b.node.chain(node)
+	}
+}
+
+func (b *PathBuilder) addRecursiveNode(selector string) {
+	node := newPathRecursiveNode(selector)
+	if b.root == nil {
+		b.root = node
+		b.node = node
+	} else {
+		b.node = b.node.chain(node)
+	}
+}
+
+func (b *PathBuilder) addSelectorNode(name string) {
+	node := newPathSelectorNode(name)
+	if b.root == nil {
+		b.root = node
+		b.node = node
+	} else {
+		b.node = b.node.chain(node)
+	}
+}
+
+func (b *PathBuilder) addIndexNode(idx int) {
+	node := newPathIndexNode(idx)
+	if b.root == nil {
+		b.root = node
+		b.node = node
+	} else {
+		b.node = b.node.chain(node)
+	}
+}
+
+type QuotePathSelector int
+
+const (
+	SingleQuotePathSelector QuotePathSelector = 1
+	DoubleQuotePathSelector QuotePathSelector = 2
+)
+
+type Path struct {
+	node                    PathNode
+	RootSelectorOnly        bool
+	SingleQuotePathSelector bool
+	DoubleQuotePathSelector bool
+}
+
+func (p *Path) Field(sel string) (PathNode, bool, error) {
+	if p.node == nil {
+		return nil, false, nil
+	}
+	return p.node.Field(sel)
+}
+
+func (p *Path) Get(src, dst reflect.Value) error {
+	if p.node == nil {
+		return nil
+	}
+	return p.node.Get(src, dst)
+}
+
+func (p *Path) String() string {
+	if p.node == nil {
+		return "$"
+	}
+	return p.node.String()
+}
+
+type PathNode interface {
+	fmt.Stringer
+	Index(idx int) (PathNode, bool, error)
+	Field(fieldName string) (PathNode, bool, error)
+	Get(src, dst reflect.Value) error
+	chain(PathNode) PathNode
+	target() bool
+	single() bool
+}
+
+type BasePathNode struct {
+	child PathNode
+}
+
+func (n *BasePathNode) chain(node PathNode) PathNode {
+	n.child = node
+	return node
+}
+
+func (n *BasePathNode) target() bool {
+	return n.child == nil
+}
+
+func (n *BasePathNode) single() bool {
+	return true
+}
+
+type PathSelectorNode struct {
+	*BasePathNode
+	selector string
+}
+
+func newPathSelectorNode(selector string) *PathSelectorNode {
+	return &PathSelectorNode{
+		BasePathNode: &BasePathNode{},
+		selector:     selector,
+	}
+}
+
+func (n *PathSelectorNode) Index(idx int) (PathNode, bool, error) {
+	return nil, false, &errors.PathError{}
+}
+
+func (n *PathSelectorNode) Field(fieldName string) (PathNode, bool, error) {
+	if n.selector == fieldName {
+		return n.child, true, nil
+	}
+	return nil, false, nil
+}
+
+func (n *PathSelectorNode) Get(src, dst reflect.Value) error {
+	switch src.Type().Kind() {
+	case reflect.Map:
+		iter := src.MapRange()
+		for iter.Next() {
+			key, ok := iter.Key().Interface().(string)
+			if !ok {
+				return fmt.Errorf("invalid map key type %T", src.Type().Key())
+			}
+			child, found, err := n.Field(key)
+			if err != nil {
+				return err
+			}
+			if found {
+				if child != nil {
+					return child.Get(iter.Value(), dst)
+				}
+				return AssignValue(iter.Value(), dst)
+			}
+		}
+	case reflect.Struct:
+		typ := src.Type()
+		for i := 0; i < typ.Len(); i++ {
+			tag := runtime.StructTagFromField(typ.Field(i))
+			child, found, err := n.Field(tag.Key)
+			if err != nil {
+				return err
+			}
+			if found {
+				if child != nil {
+					return child.Get(src.Field(i), dst)
+				}
+				return AssignValue(src.Field(i), dst)
+			}
+		}
+	case reflect.Ptr:
+		return n.Get(src.Elem(), dst)
+	case reflect.Interface:
+		return n.Get(reflect.ValueOf(src.Interface()), dst)
+	case reflect.Float64, reflect.String, reflect.Bool:
+		return AssignValue(src, dst)
+	}
+	return fmt.Errorf("failed to get %s value from %s", n.selector, src.Type())
+}
+
+func (n *PathSelectorNode) String() string {
+	s := fmt.Sprintf(".%s", n.selector)
+	if n.child != nil {
+		s += n.child.String()
+	}
+	return s
+}
+
+type PathIndexNode struct {
+	*BasePathNode
+	selector int
+}
+
+func newPathIndexNode(selector int) *PathIndexNode {
+	return &PathIndexNode{
+		BasePathNode: &BasePathNode{},
+		selector:     selector,
+	}
+}
+
+func (n *PathIndexNode) Index(idx int) (PathNode, bool, error) {
+	if n.selector == idx {
+		return n.child, true, nil
+	}
+	return nil, false, nil
+}
+
+func (n *PathIndexNode) Field(fieldName string) (PathNode, bool, error) {
+	return nil, false, &errors.PathError{}
+}
+
+func (n *PathIndexNode) Get(src, dst reflect.Value) error {
+	switch src.Type().Kind() {
+	case reflect.Array, reflect.Slice:
+		if src.Len() > n.selector {
+			if n.child != nil {
+				return n.child.Get(src.Index(n.selector), dst)
+			}
+			return AssignValue(src.Index(n.selector), dst)
+		}
+	case reflect.Ptr:
+		return n.Get(src.Elem(), dst)
+	case reflect.Interface:
+		return n.Get(reflect.ValueOf(src.Interface()), dst)
+	}
+	return fmt.Errorf("failed to get [%d] value from %s", n.selector, src.Type())
+}
+
+func (n *PathIndexNode) String() string {
+	s := fmt.Sprintf("[%d]", n.selector)
+	if n.child != nil {
+		s += n.child.String()
+	}
+	return s
+}
+
+type PathIndexAllNode struct {
+	*BasePathNode
+}
+
+func newPathIndexAllNode() *PathIndexAllNode {
+	return &PathIndexAllNode{
+		BasePathNode: &BasePathNode{},
+	}
+}
+
+func (n *PathIndexAllNode) Index(idx int) (PathNode, bool, error) {
+	return n.child, true, nil
+}
+
+func (n *PathIndexAllNode) Field(fieldName string) (PathNode, bool, error) {
+	return nil, false, &errors.PathError{}
+}
+
+func (n *PathIndexAllNode) Get(src, dst reflect.Value) error {
+	switch src.Type().Kind() {
+	case reflect.Array, reflect.Slice:
+		var arr []interface{}
+		for i := 0; i < src.Len(); i++ {
+			var v interface{}
+			rv := reflect.ValueOf(&v)
+			if n.child != nil {
+				if err := n.child.Get(src.Index(i), rv); err != nil {
+					return err
+				}
+			} else {
+				if err := AssignValue(src.Index(i), rv); err != nil {
+					return err
+				}
+			}
+			arr = append(arr, v)
+		}
+		if err := AssignValue(reflect.ValueOf(arr), dst); err != nil {
+			return err
+		}
+		return nil
+	case reflect.Ptr:
+		return n.Get(src.Elem(), dst)
+	case reflect.Interface:
+		return n.Get(reflect.ValueOf(src.Interface()), dst)
+	}
+	return fmt.Errorf("failed to get all value from %s", src.Type())
+}
+
+func (n *PathIndexAllNode) String() string {
+	s := "[*]"
+	if n.child != nil {
+		s += n.child.String()
+	}
+	return s
+}
+
+type PathRecursiveNode struct {
+	*BasePathNode
+	selector string
+}
+
+func newPathRecursiveNode(selector string) *PathRecursiveNode {
+	node := newPathSelectorNode(selector)
+	return &PathRecursiveNode{
+		BasePathNode: &BasePathNode{
+			child: node,
+		},
+		selector: selector,
+	}
+}
+
+func (n *PathRecursiveNode) Field(fieldName string) (PathNode, bool, error) {
+	if n.selector == fieldName {
+		return n.child, true, nil
+	}
+	return nil, false, nil
+}
+
+func (n *PathRecursiveNode) Index(_ int) (PathNode, bool, error) {
+	return n, true, nil
+}
+
+func valueToSliceValue(v interface{}) []interface{} {
+	rv := reflect.ValueOf(v)
+	ret := []interface{}{}
+	if rv.Type().Kind() == reflect.Slice || rv.Type().Kind() == reflect.Array {
+		for i := 0; i < rv.Len(); i++ {
+			ret = append(ret, rv.Index(i).Interface())
+		}
+		return ret
+	}
+	return []interface{}{v}
+}
+
+func (n *PathRecursiveNode) Get(src, dst reflect.Value) error {
+	if n.child == nil {
+		return fmt.Errorf("failed to get by recursive path ..%s", n.selector)
+	}
+	var arr []interface{}
+	switch src.Type().Kind() {
+	case reflect.Map:
+		iter := src.MapRange()
+		for iter.Next() {
+			key, ok := iter.Key().Interface().(string)
+			if !ok {
+				return fmt.Errorf("invalid map key type %T", src.Type().Key())
+			}
+			child, found, err := n.Field(key)
+			if err != nil {
+				return err
+			}
+			if found {
+				var v interface{}
+				rv := reflect.ValueOf(&v)
+				_ = child.Get(iter.Value(), rv)
+				arr = append(arr, valueToSliceValue(v)...)
+			} else {
+				var v interface{}
+				rv := reflect.ValueOf(&v)
+				_ = n.Get(iter.Value(), rv)
+				if v != nil {
+					arr = append(arr, valueToSliceValue(v)...)
+				}
+			}
+		}
+		_ = AssignValue(reflect.ValueOf(arr), dst)
+		return nil
+	case reflect.Struct:
+		typ := src.Type()
+		for i := 0; i < typ.Len(); i++ {
+			tag := runtime.StructTagFromField(typ.Field(i))
+			child, found, err := n.Field(tag.Key)
+			if err != nil {
+				return err
+			}
+			if found {
+				var v interface{}
+				rv := reflect.ValueOf(&v)
+				_ = child.Get(src.Field(i), rv)
+				arr = append(arr, valueToSliceValue(v)...)
+			} else {
+				var v interface{}
+				rv := reflect.ValueOf(&v)
+				_ = n.Get(src.Field(i), rv)
+				if v != nil {
+					arr = append(arr, valueToSliceValue(v)...)
+				}
+			}
+		}
+		_ = AssignValue(reflect.ValueOf(arr), dst)
+		return nil
+	case reflect.Array, reflect.Slice:
+		for i := 0; i < src.Len(); i++ {
+			var v interface{}
+			rv := reflect.ValueOf(&v)
+			_ = n.Get(src.Index(i), rv)
+			if v != nil {
+				arr = append(arr, valueToSliceValue(v)...)
+			}
+		}
+		_ = AssignValue(reflect.ValueOf(arr), dst)
+		return nil
+	case reflect.Ptr:
+		return n.Get(src.Elem(), dst)
+	case reflect.Interface:
+		return n.Get(reflect.ValueOf(src.Interface()), dst)
+	}
+	return fmt.Errorf("failed to get %s value from %s", n.selector, src.Type())
+}
+
+func (n *PathRecursiveNode) String() string {
+	s := fmt.Sprintf("..%s", n.selector)
+	if n.child != nil {
+		s += n.child.String()
+	}
+	return s
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/ptr.go b/vendor/github.com/goccy/go-json/internal/decoder/ptr.go
new file mode 100644
index 00000000..ae229946
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/ptr.go
@@ -0,0 +1,97 @@
+package decoder
+
+import (
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type ptrDecoder struct {
+	dec        Decoder
+	typ        *runtime.Type
+	structName string
+	fieldName  string
+}
+
+func newPtrDecoder(dec Decoder, typ *runtime.Type, structName, fieldName string) *ptrDecoder {
+	return &ptrDecoder{
+		dec:        dec,
+		typ:        typ,
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *ptrDecoder) contentDecoder() Decoder {
+	dec, ok := d.dec.(*ptrDecoder)
+	if !ok {
+		return d.dec
+	}
+	return dec.contentDecoder()
+}
+
+//nolint:golint
+//go:linkname unsafe_New reflect.unsafe_New
+func unsafe_New(*runtime.Type) unsafe.Pointer
+
+func UnsafeNew(t *runtime.Type) unsafe.Pointer {
+	return unsafe_New(t)
+}
+
+func (d *ptrDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	if s.skipWhiteSpace() == nul {
+		s.read()
+	}
+	if s.char() == 'n' {
+		if err := nullBytes(s); err != nil {
+			return err
+		}
+		*(*unsafe.Pointer)(p) = nil
+		return nil
+	}
+	var newptr unsafe.Pointer
+	if *(*unsafe.Pointer)(p) == nil {
+		newptr = unsafe_New(d.typ)
+		*(*unsafe.Pointer)(p) = newptr
+	} else {
+		newptr = *(*unsafe.Pointer)(p)
+	}
+	if err := d.dec.DecodeStream(s, depth, newptr); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d *ptrDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	if buf[cursor] == 'n' {
+		if err := validateNull(buf, cursor); err != nil {
+			return 0, err
+		}
+		if p != nil {
+			*(*unsafe.Pointer)(p) = nil
+		}
+		cursor += 4
+		return cursor, nil
+	}
+	var newptr unsafe.Pointer
+	if *(*unsafe.Pointer)(p) == nil {
+		newptr = unsafe_New(d.typ)
+		*(*unsafe.Pointer)(p) = newptr
+	} else {
+		newptr = *(*unsafe.Pointer)(p)
+	}
+	c, err := d.dec.Decode(ctx, cursor, depth, newptr)
+	if err != nil {
+		*(*unsafe.Pointer)(p) = nil
+		return 0, err
+	}
+	cursor = c
+	return cursor, nil
+}
+
+func (d *ptrDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: ptr decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/slice.go b/vendor/github.com/goccy/go-json/internal/decoder/slice.go
new file mode 100644
index 00000000..30a23e4b
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/slice.go
@@ -0,0 +1,380 @@
+package decoder
+
+import (
+	"reflect"
+	"sync"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+var (
+	sliceType = runtime.Type2RType(
+		reflect.TypeOf((*sliceHeader)(nil)).Elem(),
+	)
+	nilSlice = unsafe.Pointer(&sliceHeader{})
+)
+
+type sliceDecoder struct {
+	elemType          *runtime.Type
+	isElemPointerType bool
+	valueDecoder      Decoder
+	size              uintptr
+	arrayPool         sync.Pool
+	structName        string
+	fieldName         string
+}
+
+// If use reflect.SliceHeader, data type is uintptr.
+// In this case, Go compiler cannot trace reference created by newArray().
+// So, define using unsafe.Pointer as data type
+type sliceHeader struct {
+	data unsafe.Pointer
+	len  int
+	cap  int
+}
+
+const (
+	defaultSliceCapacity = 2
+)
+
+func newSliceDecoder(dec Decoder, elemType *runtime.Type, size uintptr, structName, fieldName string) *sliceDecoder {
+	return &sliceDecoder{
+		valueDecoder:      dec,
+		elemType:          elemType,
+		isElemPointerType: elemType.Kind() == reflect.Ptr || elemType.Kind() == reflect.Map,
+		size:              size,
+		arrayPool: sync.Pool{
+			New: func() interface{} {
+				return &sliceHeader{
+					data: newArray(elemType, defaultSliceCapacity),
+					len:  0,
+					cap:  defaultSliceCapacity,
+				}
+			},
+		},
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *sliceDecoder) newSlice(src *sliceHeader) *sliceHeader {
+	slice := d.arrayPool.Get().(*sliceHeader)
+	if src.len > 0 {
+		// copy original elem
+		if slice.cap < src.cap {
+			data := newArray(d.elemType, src.cap)
+			slice = &sliceHeader{data: data, len: src.len, cap: src.cap}
+		} else {
+			slice.len = src.len
+		}
+		copySlice(d.elemType, *slice, *src)
+	} else {
+		slice.len = 0
+	}
+	return slice
+}
+
+func (d *sliceDecoder) releaseSlice(p *sliceHeader) {
+	d.arrayPool.Put(p)
+}
+
+//go:linkname copySlice reflect.typedslicecopy
+func copySlice(elemType *runtime.Type, dst, src sliceHeader) int
+
+//go:linkname newArray reflect.unsafe_NewArray
+func newArray(*runtime.Type, int) unsafe.Pointer
+
+//go:linkname typedmemmove reflect.typedmemmove
+func typedmemmove(t *runtime.Type, dst, src unsafe.Pointer)
+
+func (d *sliceDecoder) errNumber(offset int64) *errors.UnmarshalTypeError {
+	return &errors.UnmarshalTypeError{
+		Value:  "number",
+		Type:   reflect.SliceOf(runtime.RType2Type(d.elemType)),
+		Struct: d.structName,
+		Field:  d.fieldName,
+		Offset: offset,
+	}
+}
+
+func (d *sliceDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+	}
+
+	for {
+		switch s.char() {
+		case ' ', '\n', '\t', '\r':
+			s.cursor++
+			continue
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return err
+			}
+			typedmemmove(sliceType, p, nilSlice)
+			return nil
+		case '[':
+			s.cursor++
+			if s.skipWhiteSpace() == ']' {
+				dst := (*sliceHeader)(p)
+				if dst.data == nil {
+					dst.data = newArray(d.elemType, 0)
+				} else {
+					dst.len = 0
+				}
+				s.cursor++
+				return nil
+			}
+			idx := 0
+			slice := d.newSlice((*sliceHeader)(p))
+			srcLen := slice.len
+			capacity := slice.cap
+			data := slice.data
+			for {
+				if capacity <= idx {
+					src := sliceHeader{data: data, len: idx, cap: capacity}
+					capacity *= 2
+					data = newArray(d.elemType, capacity)
+					dst := sliceHeader{data: data, len: idx, cap: capacity}
+					copySlice(d.elemType, dst, src)
+				}
+				ep := unsafe.Pointer(uintptr(data) + uintptr(idx)*d.size)
+
+				// if srcLen is greater than idx, keep the original reference
+				if srcLen <= idx {
+					if d.isElemPointerType {
+						**(**unsafe.Pointer)(unsafe.Pointer(&ep)) = nil // initialize elem pointer
+					} else {
+						// assign new element to the slice
+						typedmemmove(d.elemType, ep, unsafe_New(d.elemType))
+					}
+				}
+
+				if err := d.valueDecoder.DecodeStream(s, depth, ep); err != nil {
+					return err
+				}
+				s.skipWhiteSpace()
+			RETRY:
+				switch s.char() {
+				case ']':
+					slice.cap = capacity
+					slice.len = idx + 1
+					slice.data = data
+					dst := (*sliceHeader)(p)
+					dst.len = idx + 1
+					if dst.len > dst.cap {
+						dst.data = newArray(d.elemType, dst.len)
+						dst.cap = dst.len
+					}
+					copySlice(d.elemType, *dst, *slice)
+					d.releaseSlice(slice)
+					s.cursor++
+					return nil
+				case ',':
+					idx++
+				case nul:
+					if s.read() {
+						goto RETRY
+					}
+					slice.cap = capacity
+					slice.data = data
+					d.releaseSlice(slice)
+					goto ERROR
+				default:
+					slice.cap = capacity
+					slice.data = data
+					d.releaseSlice(slice)
+					goto ERROR
+				}
+				s.cursor++
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return d.errNumber(s.totalOffset())
+		case nul:
+			if s.read() {
+				continue
+			}
+			goto ERROR
+		default:
+			goto ERROR
+		}
+	}
+ERROR:
+	return errors.ErrUnexpectedEndOfJSON("slice", s.totalOffset())
+}
+
+func (d *sliceDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+	}
+
+	for {
+		switch buf[cursor] {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return 0, err
+			}
+			cursor += 4
+			typedmemmove(sliceType, p, nilSlice)
+			return cursor, nil
+		case '[':
+			cursor++
+			cursor = skipWhiteSpace(buf, cursor)
+			if buf[cursor] == ']' {
+				dst := (*sliceHeader)(p)
+				if dst.data == nil {
+					dst.data = newArray(d.elemType, 0)
+				} else {
+					dst.len = 0
+				}
+				cursor++
+				return cursor, nil
+			}
+			idx := 0
+			slice := d.newSlice((*sliceHeader)(p))
+			srcLen := slice.len
+			capacity := slice.cap
+			data := slice.data
+			for {
+				if capacity <= idx {
+					src := sliceHeader{data: data, len: idx, cap: capacity}
+					capacity *= 2
+					data = newArray(d.elemType, capacity)
+					dst := sliceHeader{data: data, len: idx, cap: capacity}
+					copySlice(d.elemType, dst, src)
+				}
+				ep := unsafe.Pointer(uintptr(data) + uintptr(idx)*d.size)
+				// if srcLen is greater than idx, keep the original reference
+				if srcLen <= idx {
+					if d.isElemPointerType {
+						**(**unsafe.Pointer)(unsafe.Pointer(&ep)) = nil // initialize elem pointer
+					} else {
+						// assign new element to the slice
+						typedmemmove(d.elemType, ep, unsafe_New(d.elemType))
+					}
+				}
+				c, err := d.valueDecoder.Decode(ctx, cursor, depth, ep)
+				if err != nil {
+					return 0, err
+				}
+				cursor = c
+				cursor = skipWhiteSpace(buf, cursor)
+				switch buf[cursor] {
+				case ']':
+					slice.cap = capacity
+					slice.len = idx + 1
+					slice.data = data
+					dst := (*sliceHeader)(p)
+					dst.len = idx + 1
+					if dst.len > dst.cap {
+						dst.data = newArray(d.elemType, dst.len)
+						dst.cap = dst.len
+					}
+					copySlice(d.elemType, *dst, *slice)
+					d.releaseSlice(slice)
+					cursor++
+					return cursor, nil
+				case ',':
+					idx++
+				default:
+					slice.cap = capacity
+					slice.data = data
+					d.releaseSlice(slice)
+					return 0, errors.ErrInvalidCharacter(buf[cursor], "slice", cursor)
+				}
+				cursor++
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return 0, d.errNumber(cursor)
+		default:
+			return 0, errors.ErrUnexpectedEndOfJSON("slice", cursor)
+		}
+	}
+}
+
+func (d *sliceDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	buf := ctx.Buf
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return nil, 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+	}
+
+	ret := [][]byte{}
+	for {
+		switch buf[cursor] {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return nil, 0, err
+			}
+			cursor += 4
+			return [][]byte{nullbytes}, cursor, nil
+		case '[':
+			cursor++
+			cursor = skipWhiteSpace(buf, cursor)
+			if buf[cursor] == ']' {
+				cursor++
+				return ret, cursor, nil
+			}
+			idx := 0
+			for {
+				child, found, err := ctx.Option.Path.node.Index(idx)
+				if err != nil {
+					return nil, 0, err
+				}
+				if found {
+					if child != nil {
+						oldPath := ctx.Option.Path.node
+						ctx.Option.Path.node = child
+						paths, c, err := d.valueDecoder.DecodePath(ctx, cursor, depth)
+						if err != nil {
+							return nil, 0, err
+						}
+						ctx.Option.Path.node = oldPath
+						ret = append(ret, paths...)
+						cursor = c
+					} else {
+						start := cursor
+						end, err := skipValue(buf, cursor, depth)
+						if err != nil {
+							return nil, 0, err
+						}
+						ret = append(ret, buf[start:end])
+						cursor = end
+					}
+				} else {
+					c, err := skipValue(buf, cursor, depth)
+					if err != nil {
+						return nil, 0, err
+					}
+					cursor = c
+				}
+				cursor = skipWhiteSpace(buf, cursor)
+				switch buf[cursor] {
+				case ']':
+					cursor++
+					return ret, cursor, nil
+				case ',':
+					idx++
+				default:
+					return nil, 0, errors.ErrInvalidCharacter(buf[cursor], "slice", cursor)
+				}
+				cursor++
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return nil, 0, d.errNumber(cursor)
+		default:
+			return nil, 0, errors.ErrUnexpectedEndOfJSON("slice", cursor)
+		}
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/stream.go b/vendor/github.com/goccy/go-json/internal/decoder/stream.go
new file mode 100644
index 00000000..a383f725
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/stream.go
@@ -0,0 +1,556 @@
+package decoder
+
+import (
+	"bytes"
+	"encoding/json"
+	"io"
+	"strconv"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+const (
+	initBufSize = 512
+)
+
+type Stream struct {
+	buf                   []byte
+	bufSize               int64
+	length                int64
+	r                     io.Reader
+	offset                int64
+	cursor                int64
+	filledBuffer          bool
+	allRead               bool
+	UseNumber             bool
+	DisallowUnknownFields bool
+	Option                *Option
+}
+
+func NewStream(r io.Reader) *Stream {
+	return &Stream{
+		r:       r,
+		bufSize: initBufSize,
+		buf:     make([]byte, initBufSize),
+		Option:  &Option{},
+	}
+}
+
+func (s *Stream) TotalOffset() int64 {
+	return s.totalOffset()
+}
+
+func (s *Stream) Buffered() io.Reader {
+	buflen := int64(len(s.buf))
+	for i := s.cursor; i < buflen; i++ {
+		if s.buf[i] == nul {
+			return bytes.NewReader(s.buf[s.cursor:i])
+		}
+	}
+	return bytes.NewReader(s.buf[s.cursor:])
+}
+
+func (s *Stream) PrepareForDecode() error {
+	for {
+		switch s.char() {
+		case ' ', '\t', '\r', '\n':
+			s.cursor++
+			continue
+		case ',', ':':
+			s.cursor++
+			return nil
+		case nul:
+			if s.read() {
+				continue
+			}
+			return io.EOF
+		}
+		break
+	}
+	return nil
+}
+
+func (s *Stream) totalOffset() int64 {
+	return s.offset + s.cursor
+}
+
+func (s *Stream) char() byte {
+	return s.buf[s.cursor]
+}
+
+func (s *Stream) equalChar(c byte) bool {
+	cur := s.buf[s.cursor]
+	if cur == nul {
+		s.read()
+		cur = s.buf[s.cursor]
+	}
+	return cur == c
+}
+
+func (s *Stream) stat() ([]byte, int64, unsafe.Pointer) {
+	return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
+}
+
+func (s *Stream) bufptr() unsafe.Pointer {
+	return (*sliceHeader)(unsafe.Pointer(&s.buf)).data
+}
+
+func (s *Stream) statForRetry() ([]byte, int64, unsafe.Pointer) {
+	s.cursor-- // for retry ( because caller progress cursor position in each loop )
+	return s.buf, s.cursor, (*sliceHeader)(unsafe.Pointer(&s.buf)).data
+}
+
+func (s *Stream) Reset() {
+	s.reset()
+	s.bufSize = int64(len(s.buf))
+}
+
+func (s *Stream) More() bool {
+	for {
+		switch s.char() {
+		case ' ', '\n', '\r', '\t':
+			s.cursor++
+			continue
+		case '}', ']':
+			return false
+		case nul:
+			if s.read() {
+				continue
+			}
+			return false
+		}
+		break
+	}
+	return true
+}
+
+func (s *Stream) Token() (interface{}, error) {
+	for {
+		c := s.char()
+		switch c {
+		case ' ', '\n', '\r', '\t':
+			s.cursor++
+		case '{', '[', ']', '}':
+			s.cursor++
+			return json.Delim(c), nil
+		case ',', ':':
+			s.cursor++
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			bytes := floatBytes(s)
+			str := *(*string)(unsafe.Pointer(&bytes))
+			if s.UseNumber {
+				return json.Number(str), nil
+			}
+			f64, err := strconv.ParseFloat(str, 64)
+			if err != nil {
+				return nil, err
+			}
+			return f64, nil
+		case '"':
+			bytes, err := stringBytes(s)
+			if err != nil {
+				return nil, err
+			}
+			return string(bytes), nil
+		case 't':
+			if err := trueBytes(s); err != nil {
+				return nil, err
+			}
+			return true, nil
+		case 'f':
+			if err := falseBytes(s); err != nil {
+				return nil, err
+			}
+			return false, nil
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return nil, err
+			}
+			return nil, nil
+		case nul:
+			if s.read() {
+				continue
+			}
+			goto END
+		default:
+			return nil, errors.ErrInvalidCharacter(s.char(), "token", s.totalOffset())
+		}
+	}
+END:
+	return nil, io.EOF
+}
+
+func (s *Stream) reset() {
+	s.offset += s.cursor
+	s.buf = s.buf[s.cursor:]
+	s.length -= s.cursor
+	s.cursor = 0
+}
+
+func (s *Stream) readBuf() []byte {
+	if s.filledBuffer {
+		s.bufSize *= 2
+		remainBuf := s.buf
+		s.buf = make([]byte, s.bufSize)
+		copy(s.buf, remainBuf)
+	}
+	remainLen := s.length - s.cursor
+	remainNotNulCharNum := int64(0)
+	for i := int64(0); i < remainLen; i++ {
+		if s.buf[s.cursor+i] == nul {
+			break
+		}
+		remainNotNulCharNum++
+	}
+	s.length = s.cursor + remainNotNulCharNum
+	return s.buf[s.cursor+remainNotNulCharNum:]
+}
+
+func (s *Stream) read() bool {
+	if s.allRead {
+		return false
+	}
+	buf := s.readBuf()
+	last := len(buf) - 1
+	buf[last] = nul
+	n, err := s.r.Read(buf[:last])
+	s.length += int64(n)
+	if n == last {
+		s.filledBuffer = true
+	} else {
+		s.filledBuffer = false
+	}
+	if err == io.EOF {
+		s.allRead = true
+	} else if err != nil {
+		return false
+	}
+	return true
+}
+
+func (s *Stream) skipWhiteSpace() byte {
+	p := s.bufptr()
+LOOP:
+	c := char(p, s.cursor)
+	switch c {
+	case ' ', '\n', '\t', '\r':
+		s.cursor++
+		goto LOOP
+	case nul:
+		if s.read() {
+			p = s.bufptr()
+			goto LOOP
+		}
+	}
+	return c
+}
+
+func (s *Stream) skipObject(depth int64) error {
+	braceCount := 1
+	_, cursor, p := s.stat()
+	for {
+		switch char(p, cursor) {
+		case '{':
+			braceCount++
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+			}
+		case '}':
+			braceCount--
+			depth--
+			if braceCount == 0 {
+				s.cursor = cursor + 1
+				return nil
+			}
+		case '[':
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+			}
+		case ']':
+			depth--
+		case '"':
+			for {
+				cursor++
+				switch char(p, cursor) {
+				case '\\':
+					cursor++
+					if char(p, cursor) == nul {
+						s.cursor = cursor
+						if s.read() {
+							_, cursor, p = s.stat()
+							continue
+						}
+						return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+					}
+				case '"':
+					goto SWITCH_OUT
+				case nul:
+					s.cursor = cursor
+					if s.read() {
+						_, cursor, p = s.statForRetry()
+						continue
+					}
+					return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+				}
+			}
+		case nul:
+			s.cursor = cursor
+			if s.read() {
+				_, cursor, p = s.stat()
+				continue
+			}
+			return errors.ErrUnexpectedEndOfJSON("object of object", cursor)
+		}
+	SWITCH_OUT:
+		cursor++
+	}
+}
+
+func (s *Stream) skipArray(depth int64) error {
+	bracketCount := 1
+	_, cursor, p := s.stat()
+	for {
+		switch char(p, cursor) {
+		case '[':
+			bracketCount++
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+			}
+		case ']':
+			bracketCount--
+			depth--
+			if bracketCount == 0 {
+				s.cursor = cursor + 1
+				return nil
+			}
+		case '{':
+			depth++
+			if depth > maxDecodeNestingDepth {
+				return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+			}
+		case '}':
+			depth--
+		case '"':
+			for {
+				cursor++
+				switch char(p, cursor) {
+				case '\\':
+					cursor++
+					if char(p, cursor) == nul {
+						s.cursor = cursor
+						if s.read() {
+							_, cursor, p = s.stat()
+							continue
+						}
+						return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+					}
+				case '"':
+					goto SWITCH_OUT
+				case nul:
+					s.cursor = cursor
+					if s.read() {
+						_, cursor, p = s.statForRetry()
+						continue
+					}
+					return errors.ErrUnexpectedEndOfJSON("string of object", cursor)
+				}
+			}
+		case nul:
+			s.cursor = cursor
+			if s.read() {
+				_, cursor, p = s.stat()
+				continue
+			}
+			return errors.ErrUnexpectedEndOfJSON("array of object", cursor)
+		}
+	SWITCH_OUT:
+		cursor++
+	}
+}
+
+func (s *Stream) skipValue(depth int64) error {
+	_, cursor, p := s.stat()
+	for {
+		switch char(p, cursor) {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case nul:
+			s.cursor = cursor
+			if s.read() {
+				_, cursor, p = s.stat()
+				continue
+			}
+			return errors.ErrUnexpectedEndOfJSON("value of object", s.totalOffset())
+		case '{':
+			s.cursor = cursor + 1
+			return s.skipObject(depth + 1)
+		case '[':
+			s.cursor = cursor + 1
+			return s.skipArray(depth + 1)
+		case '"':
+			for {
+				cursor++
+				switch char(p, cursor) {
+				case '\\':
+					cursor++
+					if char(p, cursor) == nul {
+						s.cursor = cursor
+						if s.read() {
+							_, cursor, p = s.stat()
+							continue
+						}
+						return errors.ErrUnexpectedEndOfJSON("value of string", s.totalOffset())
+					}
+				case '"':
+					s.cursor = cursor + 1
+					return nil
+				case nul:
+					s.cursor = cursor
+					if s.read() {
+						_, cursor, p = s.statForRetry()
+						continue
+					}
+					return errors.ErrUnexpectedEndOfJSON("value of string", s.totalOffset())
+				}
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			for {
+				cursor++
+				c := char(p, cursor)
+				if floatTable[c] {
+					continue
+				} else if c == nul {
+					if s.read() {
+						_, cursor, p = s.stat()
+						continue
+					}
+				}
+				s.cursor = cursor
+				return nil
+			}
+		case 't':
+			s.cursor = cursor
+			if err := trueBytes(s); err != nil {
+				return err
+			}
+			return nil
+		case 'f':
+			s.cursor = cursor
+			if err := falseBytes(s); err != nil {
+				return err
+			}
+			return nil
+		case 'n':
+			s.cursor = cursor
+			if err := nullBytes(s); err != nil {
+				return err
+			}
+			return nil
+		}
+		cursor++
+	}
+}
+
+func nullBytes(s *Stream) error {
+	// current cursor's character is 'n'
+	s.cursor++
+	if s.char() != 'u' {
+		if err := retryReadNull(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	if s.char() != 'l' {
+		if err := retryReadNull(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	if s.char() != 'l' {
+		if err := retryReadNull(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	return nil
+}
+
+func retryReadNull(s *Stream) error {
+	if s.char() == nul && s.read() {
+		return nil
+	}
+	return errors.ErrInvalidCharacter(s.char(), "null", s.totalOffset())
+}
+
+func trueBytes(s *Stream) error {
+	// current cursor's character is 't'
+	s.cursor++
+	if s.char() != 'r' {
+		if err := retryReadTrue(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	if s.char() != 'u' {
+		if err := retryReadTrue(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	if s.char() != 'e' {
+		if err := retryReadTrue(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	return nil
+}
+
+func retryReadTrue(s *Stream) error {
+	if s.char() == nul && s.read() {
+		return nil
+	}
+	return errors.ErrInvalidCharacter(s.char(), "bool(true)", s.totalOffset())
+}
+
+func falseBytes(s *Stream) error {
+	// current cursor's character is 'f'
+	s.cursor++
+	if s.char() != 'a' {
+		if err := retryReadFalse(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	if s.char() != 'l' {
+		if err := retryReadFalse(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	if s.char() != 's' {
+		if err := retryReadFalse(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	if s.char() != 'e' {
+		if err := retryReadFalse(s); err != nil {
+			return err
+		}
+	}
+	s.cursor++
+	return nil
+}
+
+func retryReadFalse(s *Stream) error {
+	if s.char() == nul && s.read() {
+		return nil
+	}
+	return errors.ErrInvalidCharacter(s.char(), "bool(false)", s.totalOffset())
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/string.go b/vendor/github.com/goccy/go-json/internal/decoder/string.go
new file mode 100644
index 00000000..32602c90
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/string.go
@@ -0,0 +1,452 @@
+package decoder
+
+import (
+	"bytes"
+	"fmt"
+	"reflect"
+	"unicode"
+	"unicode/utf16"
+	"unicode/utf8"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+type stringDecoder struct {
+	structName string
+	fieldName  string
+}
+
+func newStringDecoder(structName, fieldName string) *stringDecoder {
+	return &stringDecoder{
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *stringDecoder) errUnmarshalType(typeName string, offset int64) *errors.UnmarshalTypeError {
+	return &errors.UnmarshalTypeError{
+		Value:  typeName,
+		Type:   reflect.TypeOf(""),
+		Offset: offset,
+		Struct: d.structName,
+		Field:  d.fieldName,
+	}
+}
+
+func (d *stringDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	bytes, err := d.decodeStreamByte(s)
+	if err != nil {
+		return err
+	}
+	if bytes == nil {
+		return nil
+	}
+	**(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes))
+	s.reset()
+	return nil
+}
+
+func (d *stringDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	bytes, c, err := d.decodeByte(ctx.Buf, cursor)
+	if err != nil {
+		return 0, err
+	}
+	if bytes == nil {
+		return c, nil
+	}
+	cursor = c
+	**(**string)(unsafe.Pointer(&p)) = *(*string)(unsafe.Pointer(&bytes))
+	return cursor, nil
+}
+
+func (d *stringDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	bytes, c, err := d.decodeByte(ctx.Buf, cursor)
+	if err != nil {
+		return nil, 0, err
+	}
+	if bytes == nil {
+		return [][]byte{nullbytes}, c, nil
+	}
+	return [][]byte{bytes}, c, nil
+}
+
+var (
+	hexToInt = [256]int{
+		'0': 0,
+		'1': 1,
+		'2': 2,
+		'3': 3,
+		'4': 4,
+		'5': 5,
+		'6': 6,
+		'7': 7,
+		'8': 8,
+		'9': 9,
+		'A': 10,
+		'B': 11,
+		'C': 12,
+		'D': 13,
+		'E': 14,
+		'F': 15,
+		'a': 10,
+		'b': 11,
+		'c': 12,
+		'd': 13,
+		'e': 14,
+		'f': 15,
+	}
+)
+
+func unicodeToRune(code []byte) rune {
+	var r rune
+	for i := 0; i < len(code); i++ {
+		r = r*16 + rune(hexToInt[code[i]])
+	}
+	return r
+}
+
+func readAtLeast(s *Stream, n int64, p *unsafe.Pointer) bool {
+	for s.cursor+n >= s.length {
+		if !s.read() {
+			return false
+		}
+		*p = s.bufptr()
+	}
+	return true
+}
+
+func decodeUnicodeRune(s *Stream, p unsafe.Pointer) (rune, int64, unsafe.Pointer, error) {
+	const defaultOffset = 5
+	const surrogateOffset = 11
+
+	if !readAtLeast(s, defaultOffset, &p) {
+		return rune(0), 0, nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset())
+	}
+
+	r := unicodeToRune(s.buf[s.cursor+1 : s.cursor+defaultOffset])
+	if utf16.IsSurrogate(r) {
+		if !readAtLeast(s, surrogateOffset, &p) {
+			return unicode.ReplacementChar, defaultOffset, p, nil
+		}
+		if s.buf[s.cursor+defaultOffset] != '\\' || s.buf[s.cursor+defaultOffset+1] != 'u' {
+			return unicode.ReplacementChar, defaultOffset, p, nil
+		}
+		r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])
+		if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
+			return r, surrogateOffset, p, nil
+		}
+	}
+	return r, defaultOffset, p, nil
+}
+
+func decodeUnicode(s *Stream, p unsafe.Pointer) (unsafe.Pointer, error) {
+	const backSlashAndULen = 2 // length of \u
+
+	r, offset, pp, err := decodeUnicodeRune(s, p)
+	if err != nil {
+		return nil, err
+	}
+	unicode := []byte(string(r))
+	unicodeLen := int64(len(unicode))
+	s.buf = append(append(s.buf[:s.cursor-1], unicode...), s.buf[s.cursor+offset:]...)
+	unicodeOrgLen := offset - 1
+	s.length = s.length - (backSlashAndULen + (unicodeOrgLen - unicodeLen))
+	s.cursor = s.cursor - backSlashAndULen + unicodeLen
+	return pp, nil
+}
+
+func decodeEscapeString(s *Stream, p unsafe.Pointer) (unsafe.Pointer, error) {
+	s.cursor++
+RETRY:
+	switch s.buf[s.cursor] {
+	case '"':
+		s.buf[s.cursor] = '"'
+	case '\\':
+		s.buf[s.cursor] = '\\'
+	case '/':
+		s.buf[s.cursor] = '/'
+	case 'b':
+		s.buf[s.cursor] = '\b'
+	case 'f':
+		s.buf[s.cursor] = '\f'
+	case 'n':
+		s.buf[s.cursor] = '\n'
+	case 'r':
+		s.buf[s.cursor] = '\r'
+	case 't':
+		s.buf[s.cursor] = '\t'
+	case 'u':
+		return decodeUnicode(s, p)
+	case nul:
+		if !s.read() {
+			return nil, errors.ErrInvalidCharacter(s.char(), "escaped string", s.totalOffset())
+		}
+		p = s.bufptr()
+		goto RETRY
+	default:
+		return nil, errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+	}
+	s.buf = append(s.buf[:s.cursor-1], s.buf[s.cursor:]...)
+	s.length--
+	s.cursor--
+	p = s.bufptr()
+	return p, nil
+}
+
+var (
+	runeErrBytes    = []byte(string(utf8.RuneError))
+	runeErrBytesLen = int64(len(runeErrBytes))
+)
+
+func stringBytes(s *Stream) ([]byte, error) {
+	_, cursor, p := s.stat()
+	cursor++ // skip double quote char
+	start := cursor
+	for {
+		switch char(p, cursor) {
+		case '\\':
+			s.cursor = cursor
+			pp, err := decodeEscapeString(s, p)
+			if err != nil {
+				return nil, err
+			}
+			p = pp
+			cursor = s.cursor
+		case '"':
+			literal := s.buf[start:cursor]
+			cursor++
+			s.cursor = cursor
+			return literal, nil
+		case
+			// 0x00 is nul, 0x5c is '\\', 0x22 is '"' .
+			0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, // 0x00-0x0F
+			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, // 0x10-0x1F
+			0x20, 0x21 /*0x22,*/, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, // 0x20-0x2F
+			0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F, // 0x30-0x3F
+			0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, // 0x40-0x4F
+			0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5A, 0x5B /*0x5C,*/, 0x5D, 0x5E, 0x5F, // 0x50-0x5F
+			0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, // 0x60-0x6F
+			0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F: // 0x70-0x7F
+			// character is ASCII. skip to next char
+		case
+			0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F, // 0x80-0x8F
+			0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F, // 0x90-0x9F
+			0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF, // 0xA0-0xAF
+			0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF, // 0xB0-0xBF
+			0xC0, 0xC1, // 0xC0-0xC1
+			0xF5, 0xF6, 0xF7, 0xF8, 0xF9, 0xFA, 0xFB, 0xFC, 0xFD, 0xFE, 0xFF: // 0xF5-0xFE
+			// character is invalid
+			s.buf = append(append(append([]byte{}, s.buf[:cursor]...), runeErrBytes...), s.buf[cursor+1:]...)
+			_, _, p = s.stat()
+			cursor += runeErrBytesLen
+			s.length += runeErrBytesLen
+			continue
+		case nul:
+			s.cursor = cursor
+			if s.read() {
+				_, cursor, p = s.stat()
+				continue
+			}
+			goto ERROR
+		case 0xEF:
+			// RuneError is {0xEF, 0xBF, 0xBD}
+			if s.buf[cursor+1] == 0xBF && s.buf[cursor+2] == 0xBD {
+				// found RuneError: skip
+				cursor += 2
+				break
+			}
+			fallthrough
+		default:
+			// multi bytes character
+			if !utf8.FullRune(s.buf[cursor : len(s.buf)-1]) {
+				s.cursor = cursor
+				if s.read() {
+					_, cursor, p = s.stat()
+					continue
+				}
+				goto ERROR
+			}
+			r, size := utf8.DecodeRune(s.buf[cursor:])
+			if r == utf8.RuneError {
+				s.buf = append(append(append([]byte{}, s.buf[:cursor]...), runeErrBytes...), s.buf[cursor+1:]...)
+				cursor += runeErrBytesLen
+				s.length += runeErrBytesLen
+				_, _, p = s.stat()
+			} else {
+				cursor += int64(size)
+			}
+			continue
+		}
+		cursor++
+	}
+ERROR:
+	return nil, errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+}
+
+func (d *stringDecoder) decodeStreamByte(s *Stream) ([]byte, error) {
+	for {
+		switch s.char() {
+		case ' ', '\n', '\t', '\r':
+			s.cursor++
+			continue
+		case '[':
+			return nil, d.errUnmarshalType("array", s.totalOffset())
+		case '{':
+			return nil, d.errUnmarshalType("object", s.totalOffset())
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return nil, d.errUnmarshalType("number", s.totalOffset())
+		case '"':
+			return stringBytes(s)
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return nil, err
+			}
+			return nil, nil
+		case nul:
+			if s.read() {
+				continue
+			}
+		}
+		break
+	}
+	return nil, errors.ErrInvalidBeginningOfValue(s.char(), s.totalOffset())
+}
+
+func (d *stringDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {
+	for {
+		switch buf[cursor] {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+		case '[':
+			return nil, 0, d.errUnmarshalType("array", cursor)
+		case '{':
+			return nil, 0, d.errUnmarshalType("object", cursor)
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return nil, 0, d.errUnmarshalType("number", cursor)
+		case '"':
+			cursor++
+			start := cursor
+			b := (*sliceHeader)(unsafe.Pointer(&buf)).data
+			escaped := 0
+			for {
+				switch char(b, cursor) {
+				case '\\':
+					escaped++
+					cursor++
+					switch char(b, cursor) {
+					case '"', '\\', '/', 'b', 'f', 'n', 'r', 't':
+						cursor++
+					case 'u':
+						buflen := int64(len(buf))
+						if cursor+5 >= buflen {
+							return nil, 0, errors.ErrUnexpectedEndOfJSON("escaped string", cursor)
+						}
+						for i := int64(1); i <= 4; i++ {
+							c := char(b, cursor+i)
+							if !(('0' <= c && c <= '9') || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F')) {
+								return nil, 0, errors.ErrSyntax(fmt.Sprintf("json: invalid character %c in \\u hexadecimal character escape", c), cursor+i)
+							}
+						}
+						cursor += 5
+					default:
+						return nil, 0, errors.ErrUnexpectedEndOfJSON("escaped string", cursor)
+					}
+					continue
+				case '"':
+					literal := buf[start:cursor]
+					if escaped > 0 {
+						literal = literal[:unescapeString(literal)]
+					}
+					cursor++
+					return literal, cursor, nil
+				case nul:
+					return nil, 0, errors.ErrUnexpectedEndOfJSON("string", cursor)
+				}
+				cursor++
+			}
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return nil, 0, err
+			}
+			cursor += 4
+			return nil, cursor, nil
+		default:
+			return nil, 0, errors.ErrInvalidBeginningOfValue(buf[cursor], cursor)
+		}
+	}
+}
+
+var unescapeMap = [256]byte{
+	'"':  '"',
+	'\\': '\\',
+	'/':  '/',
+	'b':  '\b',
+	'f':  '\f',
+	'n':  '\n',
+	'r':  '\r',
+	't':  '\t',
+}
+
+func unsafeAdd(ptr unsafe.Pointer, offset int) unsafe.Pointer {
+	return unsafe.Pointer(uintptr(ptr) + uintptr(offset))
+}
+
+func unescapeString(buf []byte) int {
+	p := (*sliceHeader)(unsafe.Pointer(&buf)).data
+	end := unsafeAdd(p, len(buf))
+	src := unsafeAdd(p, bytes.IndexByte(buf, '\\'))
+	dst := src
+	for src != end {
+		c := char(src, 0)
+		if c == '\\' {
+			escapeChar := char(src, 1)
+			if escapeChar != 'u' {
+				*(*byte)(dst) = unescapeMap[escapeChar]
+				src = unsafeAdd(src, 2)
+				dst = unsafeAdd(dst, 1)
+			} else {
+				v1 := hexToInt[char(src, 2)]
+				v2 := hexToInt[char(src, 3)]
+				v3 := hexToInt[char(src, 4)]
+				v4 := hexToInt[char(src, 5)]
+				code := rune((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)
+				if code >= 0xd800 && code < 0xdc00 && uintptr(unsafeAdd(src, 11)) < uintptr(end) {
+					if char(src, 6) == '\\' && char(src, 7) == 'u' {
+						v1 := hexToInt[char(src, 8)]
+						v2 := hexToInt[char(src, 9)]
+						v3 := hexToInt[char(src, 10)]
+						v4 := hexToInt[char(src, 11)]
+						lo := rune((v1 << 12) | (v2 << 8) | (v3 << 4) | v4)
+						if lo >= 0xdc00 && lo < 0xe000 {
+							code = (code-0xd800)<<10 | (lo - 0xdc00) + 0x10000
+							src = unsafeAdd(src, 6)
+						}
+					}
+				}
+				var b [utf8.UTFMax]byte
+				n := utf8.EncodeRune(b[:], code)
+				switch n {
+				case 4:
+					*(*byte)(unsafeAdd(dst, 3)) = b[3]
+					fallthrough
+				case 3:
+					*(*byte)(unsafeAdd(dst, 2)) = b[2]
+					fallthrough
+				case 2:
+					*(*byte)(unsafeAdd(dst, 1)) = b[1]
+					fallthrough
+				case 1:
+					*(*byte)(unsafeAdd(dst, 0)) = b[0]
+				}
+				src = unsafeAdd(src, 6)
+				dst = unsafeAdd(dst, n)
+			}
+		} else {
+			*(*byte)(dst) = c
+			src = unsafeAdd(src, 1)
+			dst = unsafeAdd(dst, 1)
+		}
+	}
+	return int(uintptr(dst) - uintptr(p))
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/struct.go b/vendor/github.com/goccy/go-json/internal/decoder/struct.go
new file mode 100644
index 00000000..313da153
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/struct.go
@@ -0,0 +1,845 @@
+package decoder
+
+import (
+	"fmt"
+	"math"
+	"math/bits"
+	"sort"
+	"strings"
+	"unicode"
+	"unicode/utf16"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+type structFieldSet struct {
+	dec         Decoder
+	offset      uintptr
+	isTaggedKey bool
+	fieldIdx    int
+	key         string
+	keyLen      int64
+	err         error
+}
+
+type structDecoder struct {
+	fieldMap           map[string]*structFieldSet
+	fieldUniqueNameNum int
+	stringDecoder      *stringDecoder
+	structName         string
+	fieldName          string
+	isTriedOptimize    bool
+	keyBitmapUint8     [][256]uint8
+	keyBitmapUint16    [][256]uint16
+	sortedFieldSets    []*structFieldSet
+	keyDecoder         func(*structDecoder, []byte, int64) (int64, *structFieldSet, error)
+	keyStreamDecoder   func(*structDecoder, *Stream) (*structFieldSet, string, error)
+}
+
+var (
+	largeToSmallTable [256]byte
+)
+
+func init() {
+	for i := 0; i < 256; i++ {
+		c := i
+		if 'A' <= c && c <= 'Z' {
+			c += 'a' - 'A'
+		}
+		largeToSmallTable[i] = byte(c)
+	}
+}
+
+func toASCIILower(s string) string {
+	b := []byte(s)
+	for i := range b {
+		b[i] = largeToSmallTable[b[i]]
+	}
+	return string(b)
+}
+
+func newStructDecoder(structName, fieldName string, fieldMap map[string]*structFieldSet) *structDecoder {
+	return &structDecoder{
+		fieldMap:         fieldMap,
+		stringDecoder:    newStringDecoder(structName, fieldName),
+		structName:       structName,
+		fieldName:        fieldName,
+		keyDecoder:       decodeKey,
+		keyStreamDecoder: decodeKeyStream,
+	}
+}
+
+const (
+	allowOptimizeMaxKeyLen   = 64
+	allowOptimizeMaxFieldLen = 16
+)
+
+func (d *structDecoder) tryOptimize() {
+	fieldUniqueNameMap := map[string]int{}
+	fieldIdx := -1
+	for k, v := range d.fieldMap {
+		lower := strings.ToLower(k)
+		idx, exists := fieldUniqueNameMap[lower]
+		if exists {
+			v.fieldIdx = idx
+		} else {
+			fieldIdx++
+			v.fieldIdx = fieldIdx
+		}
+		fieldUniqueNameMap[lower] = fieldIdx
+	}
+	d.fieldUniqueNameNum = len(fieldUniqueNameMap)
+
+	if d.isTriedOptimize {
+		return
+	}
+	fieldMap := map[string]*structFieldSet{}
+	conflicted := map[string]struct{}{}
+	for k, v := range d.fieldMap {
+		key := strings.ToLower(k)
+		if key != k {
+			if key != toASCIILower(k) {
+				d.isTriedOptimize = true
+				return
+			}
+			// already exists same key (e.g. Hello and HELLO has same lower case key
+			if _, exists := conflicted[key]; exists {
+				d.isTriedOptimize = true
+				return
+			}
+			conflicted[key] = struct{}{}
+		}
+		if field, exists := fieldMap[key]; exists {
+			if field != v {
+				d.isTriedOptimize = true
+				return
+			}
+		}
+		fieldMap[key] = v
+	}
+
+	if len(fieldMap) > allowOptimizeMaxFieldLen {
+		d.isTriedOptimize = true
+		return
+	}
+
+	var maxKeyLen int
+	sortedKeys := []string{}
+	for key := range fieldMap {
+		keyLen := len(key)
+		if keyLen > allowOptimizeMaxKeyLen {
+			d.isTriedOptimize = true
+			return
+		}
+		if maxKeyLen < keyLen {
+			maxKeyLen = keyLen
+		}
+		sortedKeys = append(sortedKeys, key)
+	}
+	sort.Strings(sortedKeys)
+
+	// By allocating one extra capacity than `maxKeyLen`,
+	// it is possible to avoid the process of comparing the index of the key with the length of the bitmap each time.
+	bitmapLen := maxKeyLen + 1
+	if len(sortedKeys) <= 8 {
+		keyBitmap := make([][256]uint8, bitmapLen)
+		for i, key := range sortedKeys {
+			for j := 0; j < len(key); j++ {
+				c := key[j]
+				keyBitmap[j][c] |= (1 << uint(i))
+			}
+			d.sortedFieldSets = append(d.sortedFieldSets, fieldMap[key])
+		}
+		d.keyBitmapUint8 = keyBitmap
+		d.keyDecoder = decodeKeyByBitmapUint8
+		d.keyStreamDecoder = decodeKeyByBitmapUint8Stream
+	} else {
+		keyBitmap := make([][256]uint16, bitmapLen)
+		for i, key := range sortedKeys {
+			for j := 0; j < len(key); j++ {
+				c := key[j]
+				keyBitmap[j][c] |= (1 << uint(i))
+			}
+			d.sortedFieldSets = append(d.sortedFieldSets, fieldMap[key])
+		}
+		d.keyBitmapUint16 = keyBitmap
+		d.keyDecoder = decodeKeyByBitmapUint16
+		d.keyStreamDecoder = decodeKeyByBitmapUint16Stream
+	}
+}
+
+// decode from '\uXXXX'
+func decodeKeyCharByUnicodeRune(buf []byte, cursor int64) ([]byte, int64, error) {
+	const defaultOffset = 4
+	const surrogateOffset = 6
+
+	if cursor+defaultOffset >= int64(len(buf)) {
+		return nil, 0, errors.ErrUnexpectedEndOfJSON("escaped string", cursor)
+	}
+
+	r := unicodeToRune(buf[cursor : cursor+defaultOffset])
+	if utf16.IsSurrogate(r) {
+		cursor += defaultOffset
+		if cursor+surrogateOffset >= int64(len(buf)) || buf[cursor] != '\\' || buf[cursor+1] != 'u' {
+			return []byte(string(unicode.ReplacementChar)), cursor + defaultOffset - 1, nil
+		}
+		cursor += 2
+		r2 := unicodeToRune(buf[cursor : cursor+defaultOffset])
+		if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
+			return []byte(string(r)), cursor + defaultOffset - 1, nil
+		}
+	}
+	return []byte(string(r)), cursor + defaultOffset - 1, nil
+}
+
+func decodeKeyCharByEscapedChar(buf []byte, cursor int64) ([]byte, int64, error) {
+	c := buf[cursor]
+	cursor++
+	switch c {
+	case '"':
+		return []byte{'"'}, cursor, nil
+	case '\\':
+		return []byte{'\\'}, cursor, nil
+	case '/':
+		return []byte{'/'}, cursor, nil
+	case 'b':
+		return []byte{'\b'}, cursor, nil
+	case 'f':
+		return []byte{'\f'}, cursor, nil
+	case 'n':
+		return []byte{'\n'}, cursor, nil
+	case 'r':
+		return []byte{'\r'}, cursor, nil
+	case 't':
+		return []byte{'\t'}, cursor, nil
+	case 'u':
+		return decodeKeyCharByUnicodeRune(buf, cursor)
+	}
+	return nil, cursor, nil
+}
+
+func decodeKeyByBitmapUint8(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
+	var (
+		curBit uint8 = math.MaxUint8
+	)
+	b := (*sliceHeader)(unsafe.Pointer(&buf)).data
+	for {
+		switch char(b, cursor) {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+		case '"':
+			cursor++
+			c := char(b, cursor)
+			switch c {
+			case '"':
+				cursor++
+				return cursor, nil, nil
+			case nul:
+				return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
+			}
+			keyIdx := 0
+			bitmap := d.keyBitmapUint8
+			start := cursor
+			for {
+				c := char(b, cursor)
+				switch c {
+				case '"':
+					fieldSetIndex := bits.TrailingZeros8(curBit)
+					field := d.sortedFieldSets[fieldSetIndex]
+					keyLen := cursor - start
+					cursor++
+					if keyLen < field.keyLen {
+						// early match
+						return cursor, nil, nil
+					}
+					return cursor, field, nil
+				case nul:
+					return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
+				case '\\':
+					cursor++
+					chars, nextCursor, err := decodeKeyCharByEscapedChar(buf, cursor)
+					if err != nil {
+						return 0, nil, err
+					}
+					for _, c := range chars {
+						curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+						if curBit == 0 {
+							return decodeKeyNotFound(b, cursor)
+						}
+						keyIdx++
+					}
+					cursor = nextCursor
+				default:
+					curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+					if curBit == 0 {
+						return decodeKeyNotFound(b, cursor)
+					}
+					keyIdx++
+				}
+				cursor++
+			}
+		default:
+			return cursor, nil, errors.ErrInvalidBeginningOfValue(char(b, cursor), cursor)
+		}
+	}
+}
+
+func decodeKeyByBitmapUint16(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
+	var (
+		curBit uint16 = math.MaxUint16
+	)
+	b := (*sliceHeader)(unsafe.Pointer(&buf)).data
+	for {
+		switch char(b, cursor) {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+		case '"':
+			cursor++
+			c := char(b, cursor)
+			switch c {
+			case '"':
+				cursor++
+				return cursor, nil, nil
+			case nul:
+				return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
+			}
+			keyIdx := 0
+			bitmap := d.keyBitmapUint16
+			start := cursor
+			for {
+				c := char(b, cursor)
+				switch c {
+				case '"':
+					fieldSetIndex := bits.TrailingZeros16(curBit)
+					field := d.sortedFieldSets[fieldSetIndex]
+					keyLen := cursor - start
+					cursor++
+					if keyLen < field.keyLen {
+						// early match
+						return cursor, nil, nil
+					}
+					return cursor, field, nil
+				case nul:
+					return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
+				case '\\':
+					cursor++
+					chars, nextCursor, err := decodeKeyCharByEscapedChar(buf, cursor)
+					if err != nil {
+						return 0, nil, err
+					}
+					for _, c := range chars {
+						curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+						if curBit == 0 {
+							return decodeKeyNotFound(b, cursor)
+						}
+						keyIdx++
+					}
+					cursor = nextCursor
+				default:
+					curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+					if curBit == 0 {
+						return decodeKeyNotFound(b, cursor)
+					}
+					keyIdx++
+				}
+				cursor++
+			}
+		default:
+			return cursor, nil, errors.ErrInvalidBeginningOfValue(char(b, cursor), cursor)
+		}
+	}
+}
+
+func decodeKeyNotFound(b unsafe.Pointer, cursor int64) (int64, *structFieldSet, error) {
+	for {
+		cursor++
+		switch char(b, cursor) {
+		case '"':
+			cursor++
+			return cursor, nil, nil
+		case '\\':
+			cursor++
+			if char(b, cursor) == nul {
+				return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
+			}
+		case nul:
+			return 0, nil, errors.ErrUnexpectedEndOfJSON("string", cursor)
+		}
+	}
+}
+
+func decodeKey(d *structDecoder, buf []byte, cursor int64) (int64, *structFieldSet, error) {
+	key, c, err := d.stringDecoder.decodeByte(buf, cursor)
+	if err != nil {
+		return 0, nil, err
+	}
+	cursor = c
+	k := *(*string)(unsafe.Pointer(&key))
+	field, exists := d.fieldMap[k]
+	if !exists {
+		return cursor, nil, nil
+	}
+	return cursor, field, nil
+}
+
+func decodeKeyByBitmapUint8Stream(d *structDecoder, s *Stream) (*structFieldSet, string, error) {
+	var (
+		curBit uint8 = math.MaxUint8
+	)
+	_, cursor, p := s.stat()
+	for {
+		switch char(p, cursor) {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+		case nul:
+			s.cursor = cursor
+			if s.read() {
+				_, cursor, p = s.stat()
+				continue
+			}
+			return nil, "", errors.ErrInvalidBeginningOfValue(char(p, cursor), s.totalOffset())
+		case '"':
+			cursor++
+		FIRST_CHAR:
+			start := cursor
+			switch char(p, cursor) {
+			case '"':
+				cursor++
+				s.cursor = cursor
+				return nil, "", nil
+			case nul:
+				s.cursor = cursor
+				if s.read() {
+					_, cursor, p = s.stat()
+					goto FIRST_CHAR
+				}
+				return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+			}
+			keyIdx := 0
+			bitmap := d.keyBitmapUint8
+			for {
+				c := char(p, cursor)
+				switch c {
+				case '"':
+					fieldSetIndex := bits.TrailingZeros8(curBit)
+					field := d.sortedFieldSets[fieldSetIndex]
+					keyLen := cursor - start
+					cursor++
+					s.cursor = cursor
+					if keyLen < field.keyLen {
+						// early match
+						return nil, field.key, nil
+					}
+					return field, field.key, nil
+				case nul:
+					s.cursor = cursor
+					if s.read() {
+						_, cursor, p = s.stat()
+						continue
+					}
+					return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+				case '\\':
+					s.cursor = cursor + 1 // skip '\' char
+					chars, err := decodeKeyCharByEscapeCharStream(s)
+					if err != nil {
+						return nil, "", err
+					}
+					cursor = s.cursor
+					for _, c := range chars {
+						curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+						if curBit == 0 {
+							s.cursor = cursor
+							return decodeKeyNotFoundStream(s, start)
+						}
+						keyIdx++
+					}
+				default:
+					curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+					if curBit == 0 {
+						s.cursor = cursor
+						return decodeKeyNotFoundStream(s, start)
+					}
+					keyIdx++
+				}
+				cursor++
+			}
+		default:
+			return nil, "", errors.ErrInvalidBeginningOfValue(char(p, cursor), s.totalOffset())
+		}
+	}
+}
+
+func decodeKeyByBitmapUint16Stream(d *structDecoder, s *Stream) (*structFieldSet, string, error) {
+	var (
+		curBit uint16 = math.MaxUint16
+	)
+	_, cursor, p := s.stat()
+	for {
+		switch char(p, cursor) {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+		case nul:
+			s.cursor = cursor
+			if s.read() {
+				_, cursor, p = s.stat()
+				continue
+			}
+			return nil, "", errors.ErrInvalidBeginningOfValue(char(p, cursor), s.totalOffset())
+		case '"':
+			cursor++
+		FIRST_CHAR:
+			start := cursor
+			switch char(p, cursor) {
+			case '"':
+				cursor++
+				s.cursor = cursor
+				return nil, "", nil
+			case nul:
+				s.cursor = cursor
+				if s.read() {
+					_, cursor, p = s.stat()
+					goto FIRST_CHAR
+				}
+				return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+			}
+			keyIdx := 0
+			bitmap := d.keyBitmapUint16
+			for {
+				c := char(p, cursor)
+				switch c {
+				case '"':
+					fieldSetIndex := bits.TrailingZeros16(curBit)
+					field := d.sortedFieldSets[fieldSetIndex]
+					keyLen := cursor - start
+					cursor++
+					s.cursor = cursor
+					if keyLen < field.keyLen {
+						// early match
+						return nil, field.key, nil
+					}
+					return field, field.key, nil
+				case nul:
+					s.cursor = cursor
+					if s.read() {
+						_, cursor, p = s.stat()
+						continue
+					}
+					return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+				case '\\':
+					s.cursor = cursor + 1 // skip '\' char
+					chars, err := decodeKeyCharByEscapeCharStream(s)
+					if err != nil {
+						return nil, "", err
+					}
+					cursor = s.cursor
+					for _, c := range chars {
+						curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+						if curBit == 0 {
+							s.cursor = cursor
+							return decodeKeyNotFoundStream(s, start)
+						}
+						keyIdx++
+					}
+				default:
+					curBit &= bitmap[keyIdx][largeToSmallTable[c]]
+					if curBit == 0 {
+						s.cursor = cursor
+						return decodeKeyNotFoundStream(s, start)
+					}
+					keyIdx++
+				}
+				cursor++
+			}
+		default:
+			return nil, "", errors.ErrInvalidBeginningOfValue(char(p, cursor), s.totalOffset())
+		}
+	}
+}
+
+// decode from '\uXXXX'
+func decodeKeyCharByUnicodeRuneStream(s *Stream) ([]byte, error) {
+	const defaultOffset = 4
+	const surrogateOffset = 6
+
+	if s.cursor+defaultOffset >= s.length {
+		if !s.read() {
+			return nil, errors.ErrInvalidCharacter(s.char(), "escaped unicode char", s.totalOffset())
+		}
+	}
+
+	r := unicodeToRune(s.buf[s.cursor : s.cursor+defaultOffset])
+	if utf16.IsSurrogate(r) {
+		s.cursor += defaultOffset
+		if s.cursor+surrogateOffset >= s.length {
+			s.read()
+		}
+		if s.cursor+surrogateOffset >= s.length || s.buf[s.cursor] != '\\' || s.buf[s.cursor+1] != 'u' {
+			s.cursor += defaultOffset - 1
+			return []byte(string(unicode.ReplacementChar)), nil
+		}
+		r2 := unicodeToRune(s.buf[s.cursor+defaultOffset+2 : s.cursor+surrogateOffset])
+		if r := utf16.DecodeRune(r, r2); r != unicode.ReplacementChar {
+			s.cursor += defaultOffset - 1
+			return []byte(string(r)), nil
+		}
+	}
+	s.cursor += defaultOffset - 1
+	return []byte(string(r)), nil
+}
+
+func decodeKeyCharByEscapeCharStream(s *Stream) ([]byte, error) {
+	c := s.buf[s.cursor]
+	s.cursor++
+RETRY:
+	switch c {
+	case '"':
+		return []byte{'"'}, nil
+	case '\\':
+		return []byte{'\\'}, nil
+	case '/':
+		return []byte{'/'}, nil
+	case 'b':
+		return []byte{'\b'}, nil
+	case 'f':
+		return []byte{'\f'}, nil
+	case 'n':
+		return []byte{'\n'}, nil
+	case 'r':
+		return []byte{'\r'}, nil
+	case 't':
+		return []byte{'\t'}, nil
+	case 'u':
+		return decodeKeyCharByUnicodeRuneStream(s)
+	case nul:
+		if !s.read() {
+			return nil, errors.ErrInvalidCharacter(s.char(), "escaped char", s.totalOffset())
+		}
+		goto RETRY
+	default:
+		return nil, errors.ErrUnexpectedEndOfJSON("struct field", s.totalOffset())
+	}
+}
+
+func decodeKeyNotFoundStream(s *Stream, start int64) (*structFieldSet, string, error) {
+	buf, cursor, p := s.stat()
+	for {
+		cursor++
+		switch char(p, cursor) {
+		case '"':
+			b := buf[start:cursor]
+			key := *(*string)(unsafe.Pointer(&b))
+			cursor++
+			s.cursor = cursor
+			return nil, key, nil
+		case '\\':
+			cursor++
+			if char(p, cursor) == nul {
+				s.cursor = cursor
+				if !s.read() {
+					return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+				}
+				buf, cursor, p = s.statForRetry()
+			}
+		case nul:
+			s.cursor = cursor
+			if !s.read() {
+				return nil, "", errors.ErrUnexpectedEndOfJSON("string", s.totalOffset())
+			}
+			buf, cursor, p = s.statForRetry()
+		}
+	}
+}
+
+func decodeKeyStream(d *structDecoder, s *Stream) (*structFieldSet, string, error) {
+	key, err := d.stringDecoder.decodeStreamByte(s)
+	if err != nil {
+		return nil, "", err
+	}
+	k := *(*string)(unsafe.Pointer(&key))
+	return d.fieldMap[k], k, nil
+}
+
+func (d *structDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return errors.ErrExceededMaxDepth(s.char(), s.cursor)
+	}
+
+	c := s.skipWhiteSpace()
+	switch c {
+	case 'n':
+		if err := nullBytes(s); err != nil {
+			return err
+		}
+		return nil
+	default:
+		if s.char() != '{' {
+			return errors.ErrInvalidBeginningOfValue(s.char(), s.totalOffset())
+		}
+	}
+	s.cursor++
+	if s.skipWhiteSpace() == '}' {
+		s.cursor++
+		return nil
+	}
+	var (
+		seenFields   map[int]struct{}
+		seenFieldNum int
+	)
+	firstWin := (s.Option.Flags & FirstWinOption) != 0
+	if firstWin {
+		seenFields = make(map[int]struct{}, d.fieldUniqueNameNum)
+	}
+	for {
+		s.reset()
+		field, key, err := d.keyStreamDecoder(d, s)
+		if err != nil {
+			return err
+		}
+		if s.skipWhiteSpace() != ':' {
+			return errors.ErrExpected("colon after object key", s.totalOffset())
+		}
+		s.cursor++
+		if field != nil {
+			if field.err != nil {
+				return field.err
+			}
+			if firstWin {
+				if _, exists := seenFields[field.fieldIdx]; exists {
+					if err := s.skipValue(depth); err != nil {
+						return err
+					}
+				} else {
+					if err := field.dec.DecodeStream(s, depth, unsafe.Pointer(uintptr(p)+field.offset)); err != nil {
+						return err
+					}
+					seenFieldNum++
+					if d.fieldUniqueNameNum <= seenFieldNum {
+						return s.skipObject(depth)
+					}
+					seenFields[field.fieldIdx] = struct{}{}
+				}
+			} else {
+				if err := field.dec.DecodeStream(s, depth, unsafe.Pointer(uintptr(p)+field.offset)); err != nil {
+					return err
+				}
+			}
+		} else if s.DisallowUnknownFields {
+			return fmt.Errorf("json: unknown field %q", key)
+		} else {
+			if err := s.skipValue(depth); err != nil {
+				return err
+			}
+		}
+		c := s.skipWhiteSpace()
+		if c == '}' {
+			s.cursor++
+			return nil
+		}
+		if c != ',' {
+			return errors.ErrExpected("comma after object element", s.totalOffset())
+		}
+		s.cursor++
+	}
+}
+
+func (d *structDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	depth++
+	if depth > maxDecodeNestingDepth {
+		return 0, errors.ErrExceededMaxDepth(buf[cursor], cursor)
+	}
+	buflen := int64(len(buf))
+	cursor = skipWhiteSpace(buf, cursor)
+	b := (*sliceHeader)(unsafe.Pointer(&buf)).data
+	switch char(b, cursor) {
+	case 'n':
+		if err := validateNull(buf, cursor); err != nil {
+			return 0, err
+		}
+		cursor += 4
+		return cursor, nil
+	case '{':
+	default:
+		return 0, errors.ErrInvalidBeginningOfValue(char(b, cursor), cursor)
+	}
+	cursor++
+	cursor = skipWhiteSpace(buf, cursor)
+	if buf[cursor] == '}' {
+		cursor++
+		return cursor, nil
+	}
+	var (
+		seenFields   map[int]struct{}
+		seenFieldNum int
+	)
+	firstWin := (ctx.Option.Flags & FirstWinOption) != 0
+	if firstWin {
+		seenFields = make(map[int]struct{}, d.fieldUniqueNameNum)
+	}
+	for {
+		c, field, err := d.keyDecoder(d, buf, cursor)
+		if err != nil {
+			return 0, err
+		}
+		cursor = skipWhiteSpace(buf, c)
+		if char(b, cursor) != ':' {
+			return 0, errors.ErrExpected("colon after object key", cursor)
+		}
+		cursor++
+		if cursor >= buflen {
+			return 0, errors.ErrExpected("object value after colon", cursor)
+		}
+		if field != nil {
+			if field.err != nil {
+				return 0, field.err
+			}
+			if firstWin {
+				if _, exists := seenFields[field.fieldIdx]; exists {
+					c, err := skipValue(buf, cursor, depth)
+					if err != nil {
+						return 0, err
+					}
+					cursor = c
+				} else {
+					c, err := field.dec.Decode(ctx, cursor, depth, unsafe.Pointer(uintptr(p)+field.offset))
+					if err != nil {
+						return 0, err
+					}
+					cursor = c
+					seenFieldNum++
+					if d.fieldUniqueNameNum <= seenFieldNum {
+						return skipObject(buf, cursor, depth)
+					}
+					seenFields[field.fieldIdx] = struct{}{}
+				}
+			} else {
+				c, err := field.dec.Decode(ctx, cursor, depth, unsafe.Pointer(uintptr(p)+field.offset))
+				if err != nil {
+					return 0, err
+				}
+				cursor = c
+			}
+		} else {
+			c, err := skipValue(buf, cursor, depth)
+			if err != nil {
+				return 0, err
+			}
+			cursor = c
+		}
+		cursor = skipWhiteSpace(buf, cursor)
+		if char(b, cursor) == '}' {
+			cursor++
+			return cursor, nil
+		}
+		if char(b, cursor) != ',' {
+			return 0, errors.ErrExpected("comma after object element", cursor)
+		}
+		cursor++
+	}
+}
+
+func (d *structDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: struct decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/type.go b/vendor/github.com/goccy/go-json/internal/decoder/type.go
new file mode 100644
index 00000000..beaf3ab8
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/type.go
@@ -0,0 +1,30 @@
+package decoder
+
+import (
+	"context"
+	"encoding"
+	"encoding/json"
+	"reflect"
+	"unsafe"
+)
+
+type Decoder interface {
+	Decode(*RuntimeContext, int64, int64, unsafe.Pointer) (int64, error)
+	DecodePath(*RuntimeContext, int64, int64) ([][]byte, int64, error)
+	DecodeStream(*Stream, int64, unsafe.Pointer) error
+}
+
+const (
+	nul                   = '\000'
+	maxDecodeNestingDepth = 10000
+)
+
+type unmarshalerContext interface {
+	UnmarshalJSON(context.Context, []byte) error
+}
+
+var (
+	unmarshalJSONType        = reflect.TypeOf((*json.Unmarshaler)(nil)).Elem()
+	unmarshalJSONContextType = reflect.TypeOf((*unmarshalerContext)(nil)).Elem()
+	unmarshalTextType        = reflect.TypeOf((*encoding.TextUnmarshaler)(nil)).Elem()
+)
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/uint.go b/vendor/github.com/goccy/go-json/internal/decoder/uint.go
new file mode 100644
index 00000000..4131731b
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/uint.go
@@ -0,0 +1,194 @@
+package decoder
+
+import (
+	"fmt"
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type uintDecoder struct {
+	typ        *runtime.Type
+	kind       reflect.Kind
+	op         func(unsafe.Pointer, uint64)
+	structName string
+	fieldName  string
+}
+
+func newUintDecoder(typ *runtime.Type, structName, fieldName string, op func(unsafe.Pointer, uint64)) *uintDecoder {
+	return &uintDecoder{
+		typ:        typ,
+		kind:       typ.Kind(),
+		op:         op,
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *uintDecoder) typeError(buf []byte, offset int64) *errors.UnmarshalTypeError {
+	return &errors.UnmarshalTypeError{
+		Value:  fmt.Sprintf("number %s", string(buf)),
+		Type:   runtime.RType2Type(d.typ),
+		Offset: offset,
+	}
+}
+
+var (
+	pow10u64 = [...]uint64{
+		1e00, 1e01, 1e02, 1e03, 1e04, 1e05, 1e06, 1e07, 1e08, 1e09,
+		1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
+	}
+	pow10u64Len = len(pow10u64)
+)
+
+func (d *uintDecoder) parseUint(b []byte) (uint64, error) {
+	maxDigit := len(b)
+	if maxDigit > pow10u64Len {
+		return 0, fmt.Errorf("invalid length of number")
+	}
+	sum := uint64(0)
+	for i := 0; i < maxDigit; i++ {
+		c := uint64(b[i]) - 48
+		digitValue := pow10u64[maxDigit-i-1]
+		sum += c * digitValue
+	}
+	return sum, nil
+}
+
+func (d *uintDecoder) decodeStreamByte(s *Stream) ([]byte, error) {
+	for {
+		switch s.char() {
+		case ' ', '\n', '\t', '\r':
+			s.cursor++
+			continue
+		case '0':
+			s.cursor++
+			return numZeroBuf, nil
+		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			start := s.cursor
+			for {
+				s.cursor++
+				if numTable[s.char()] {
+					continue
+				} else if s.char() == nul {
+					if s.read() {
+						s.cursor-- // for retry current character
+						continue
+					}
+				}
+				break
+			}
+			num := s.buf[start:s.cursor]
+			return num, nil
+		case 'n':
+			if err := nullBytes(s); err != nil {
+				return nil, err
+			}
+			return nil, nil
+		case nul:
+			if s.read() {
+				continue
+			}
+		default:
+			return nil, d.typeError([]byte{s.char()}, s.totalOffset())
+		}
+		break
+	}
+	return nil, errors.ErrUnexpectedEndOfJSON("number(unsigned integer)", s.totalOffset())
+}
+
+func (d *uintDecoder) decodeByte(buf []byte, cursor int64) ([]byte, int64, error) {
+	for {
+		switch buf[cursor] {
+		case ' ', '\n', '\t', '\r':
+			cursor++
+			continue
+		case '0':
+			cursor++
+			return numZeroBuf, cursor, nil
+		case '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			start := cursor
+			cursor++
+			for numTable[buf[cursor]] {
+				cursor++
+			}
+			num := buf[start:cursor]
+			return num, cursor, nil
+		case 'n':
+			if err := validateNull(buf, cursor); err != nil {
+				return nil, 0, err
+			}
+			cursor += 4
+			return nil, cursor, nil
+		default:
+			return nil, 0, d.typeError([]byte{buf[cursor]}, cursor)
+		}
+	}
+}
+
+func (d *uintDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	bytes, err := d.decodeStreamByte(s)
+	if err != nil {
+		return err
+	}
+	if bytes == nil {
+		return nil
+	}
+	u64, err := d.parseUint(bytes)
+	if err != nil {
+		return d.typeError(bytes, s.totalOffset())
+	}
+	switch d.kind {
+	case reflect.Uint8:
+		if (1 << 8) <= u64 {
+			return d.typeError(bytes, s.totalOffset())
+		}
+	case reflect.Uint16:
+		if (1 << 16) <= u64 {
+			return d.typeError(bytes, s.totalOffset())
+		}
+	case reflect.Uint32:
+		if (1 << 32) <= u64 {
+			return d.typeError(bytes, s.totalOffset())
+		}
+	}
+	d.op(p, u64)
+	return nil
+}
+
+func (d *uintDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	bytes, c, err := d.decodeByte(ctx.Buf, cursor)
+	if err != nil {
+		return 0, err
+	}
+	if bytes == nil {
+		return c, nil
+	}
+	cursor = c
+	u64, err := d.parseUint(bytes)
+	if err != nil {
+		return 0, d.typeError(bytes, cursor)
+	}
+	switch d.kind {
+	case reflect.Uint8:
+		if (1 << 8) <= u64 {
+			return 0, d.typeError(bytes, cursor)
+		}
+	case reflect.Uint16:
+		if (1 << 16) <= u64 {
+			return 0, d.typeError(bytes, cursor)
+		}
+	case reflect.Uint32:
+		if (1 << 32) <= u64 {
+			return 0, d.typeError(bytes, cursor)
+		}
+	}
+	d.op(p, u64)
+	return cursor, nil
+}
+
+func (d *uintDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: uint decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_json.go b/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_json.go
new file mode 100644
index 00000000..4cd6dbd5
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_json.go
@@ -0,0 +1,104 @@
+package decoder
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type unmarshalJSONDecoder struct {
+	typ        *runtime.Type
+	structName string
+	fieldName  string
+}
+
+func newUnmarshalJSONDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalJSONDecoder {
+	return &unmarshalJSONDecoder{
+		typ:        typ,
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *unmarshalJSONDecoder) annotateError(cursor int64, err error) {
+	switch e := err.(type) {
+	case *errors.UnmarshalTypeError:
+		e.Struct = d.structName
+		e.Field = d.fieldName
+	case *errors.SyntaxError:
+		e.Offset = cursor
+	}
+}
+
+func (d *unmarshalJSONDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	s.skipWhiteSpace()
+	start := s.cursor
+	if err := s.skipValue(depth); err != nil {
+		return err
+	}
+	src := s.buf[start:s.cursor]
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: d.typ,
+		ptr: p,
+	}))
+	switch v := v.(type) {
+	case unmarshalerContext:
+		var ctx context.Context
+		if (s.Option.Flags & ContextOption) != 0 {
+			ctx = s.Option.Context
+		} else {
+			ctx = context.Background()
+		}
+		if err := v.UnmarshalJSON(ctx, dst); err != nil {
+			d.annotateError(s.cursor, err)
+			return err
+		}
+	case json.Unmarshaler:
+		if err := v.UnmarshalJSON(dst); err != nil {
+			d.annotateError(s.cursor, err)
+			return err
+		}
+	}
+	return nil
+}
+
+func (d *unmarshalJSONDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	start := cursor
+	end, err := skipValue(buf, cursor, depth)
+	if err != nil {
+		return 0, err
+	}
+	src := buf[start:end]
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: d.typ,
+		ptr: p,
+	}))
+	if (ctx.Option.Flags & ContextOption) != 0 {
+		if err := v.(unmarshalerContext).UnmarshalJSON(ctx.Option.Context, dst); err != nil {
+			d.annotateError(cursor, err)
+			return 0, err
+		}
+	} else {
+		if err := v.(json.Unmarshaler).UnmarshalJSON(dst); err != nil {
+			d.annotateError(cursor, err)
+			return 0, err
+		}
+	}
+	return end, nil
+}
+
+func (d *unmarshalJSONDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: unmarshal json decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go b/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go
new file mode 100644
index 00000000..d711d0f8
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/unmarshal_text.go
@@ -0,0 +1,285 @@
+package decoder
+
+import (
+	"bytes"
+	"encoding"
+	"fmt"
+	"unicode"
+	"unicode/utf16"
+	"unicode/utf8"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type unmarshalTextDecoder struct {
+	typ        *runtime.Type
+	structName string
+	fieldName  string
+}
+
+func newUnmarshalTextDecoder(typ *runtime.Type, structName, fieldName string) *unmarshalTextDecoder {
+	return &unmarshalTextDecoder{
+		typ:        typ,
+		structName: structName,
+		fieldName:  fieldName,
+	}
+}
+
+func (d *unmarshalTextDecoder) annotateError(cursor int64, err error) {
+	switch e := err.(type) {
+	case *errors.UnmarshalTypeError:
+		e.Struct = d.structName
+		e.Field = d.fieldName
+	case *errors.SyntaxError:
+		e.Offset = cursor
+	}
+}
+
+var (
+	nullbytes = []byte(`null`)
+)
+
+func (d *unmarshalTextDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	s.skipWhiteSpace()
+	start := s.cursor
+	if err := s.skipValue(depth); err != nil {
+		return err
+	}
+	src := s.buf[start:s.cursor]
+	if len(src) > 0 {
+		switch src[0] {
+		case '[':
+			return &errors.UnmarshalTypeError{
+				Value:  "array",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		case '{':
+			return &errors.UnmarshalTypeError{
+				Value:  "object",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return &errors.UnmarshalTypeError{
+				Value:  "number",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: s.totalOffset(),
+			}
+		case 'n':
+			if bytes.Equal(src, nullbytes) {
+				*(*unsafe.Pointer)(p) = nil
+				return nil
+			}
+		}
+	}
+	dst := make([]byte, len(src))
+	copy(dst, src)
+
+	if b, ok := unquoteBytes(dst); ok {
+		dst = b
+	}
+	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: d.typ,
+		ptr: p,
+	}))
+	if err := v.(encoding.TextUnmarshaler).UnmarshalText(dst); err != nil {
+		d.annotateError(s.cursor, err)
+		return err
+	}
+	return nil
+}
+
+func (d *unmarshalTextDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	buf := ctx.Buf
+	cursor = skipWhiteSpace(buf, cursor)
+	start := cursor
+	end, err := skipValue(buf, cursor, depth)
+	if err != nil {
+		return 0, err
+	}
+	src := buf[start:end]
+	if len(src) > 0 {
+		switch src[0] {
+		case '[':
+			return 0, &errors.UnmarshalTypeError{
+				Value:  "array",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: start,
+			}
+		case '{':
+			return 0, &errors.UnmarshalTypeError{
+				Value:  "object",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: start,
+			}
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return 0, &errors.UnmarshalTypeError{
+				Value:  "number",
+				Type:   runtime.RType2Type(d.typ),
+				Offset: start,
+			}
+		case 'n':
+			if bytes.Equal(src, nullbytes) {
+				*(*unsafe.Pointer)(p) = nil
+				return end, nil
+			}
+		}
+	}
+
+	if s, ok := unquoteBytes(src); ok {
+		src = s
+	}
+	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: d.typ,
+		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
+	}))
+	if err := v.(encoding.TextUnmarshaler).UnmarshalText(src); err != nil {
+		d.annotateError(cursor, err)
+		return 0, err
+	}
+	return end, nil
+}
+
+func (d *unmarshalTextDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: unmarshal text decoder does not support decode path")
+}
+
+func unquoteBytes(s []byte) (t []byte, ok bool) { //nolint: nonamedreturns
+	length := len(s)
+	if length < 2 || s[0] != '"' || s[length-1] != '"' {
+		return
+	}
+	s = s[1 : length-1]
+	length -= 2
+
+	// Check for unusual characters. If there are none,
+	// then no unquoting is needed, so return a slice of the
+	// original bytes.
+	r := 0
+	for r < length {
+		c := s[r]
+		if c == '\\' || c == '"' || c < ' ' {
+			break
+		}
+		if c < utf8.RuneSelf {
+			r++
+			continue
+		}
+		rr, size := utf8.DecodeRune(s[r:])
+		if rr == utf8.RuneError && size == 1 {
+			break
+		}
+		r += size
+	}
+	if r == length {
+		return s, true
+	}
+
+	b := make([]byte, length+2*utf8.UTFMax)
+	w := copy(b, s[0:r])
+	for r < length {
+		// Out of room? Can only happen if s is full of
+		// malformed UTF-8 and we're replacing each
+		// byte with RuneError.
+		if w >= len(b)-2*utf8.UTFMax {
+			nb := make([]byte, (len(b)+utf8.UTFMax)*2)
+			copy(nb, b[0:w])
+			b = nb
+		}
+		switch c := s[r]; {
+		case c == '\\':
+			r++
+			if r >= length {
+				return
+			}
+			switch s[r] {
+			default:
+				return
+			case '"', '\\', '/', '\'':
+				b[w] = s[r]
+				r++
+				w++
+			case 'b':
+				b[w] = '\b'
+				r++
+				w++
+			case 'f':
+				b[w] = '\f'
+				r++
+				w++
+			case 'n':
+				b[w] = '\n'
+				r++
+				w++
+			case 'r':
+				b[w] = '\r'
+				r++
+				w++
+			case 't':
+				b[w] = '\t'
+				r++
+				w++
+			case 'u':
+				r--
+				rr := getu4(s[r:])
+				if rr < 0 {
+					return
+				}
+				r += 6
+				if utf16.IsSurrogate(rr) {
+					rr1 := getu4(s[r:])
+					if dec := utf16.DecodeRune(rr, rr1); dec != unicode.ReplacementChar {
+						// A valid pair; consume.
+						r += 6
+						w += utf8.EncodeRune(b[w:], dec)
+						break
+					}
+					// Invalid surrogate; fall back to replacement rune.
+					rr = unicode.ReplacementChar
+				}
+				w += utf8.EncodeRune(b[w:], rr)
+			}
+
+		// Quote, control characters are invalid.
+		case c == '"', c < ' ':
+			return
+
+		// ASCII
+		case c < utf8.RuneSelf:
+			b[w] = c
+			r++
+			w++
+
+		// Coerce to well-formed UTF-8.
+		default:
+			rr, size := utf8.DecodeRune(s[r:])
+			r += size
+			w += utf8.EncodeRune(b[w:], rr)
+		}
+	}
+	return b[0:w], true
+}
+
+func getu4(s []byte) rune {
+	if len(s) < 6 || s[0] != '\\' || s[1] != 'u' {
+		return -1
+	}
+	var r rune
+	for _, c := range s[2:6] {
+		switch {
+		case '0' <= c && c <= '9':
+			c = c - '0'
+		case 'a' <= c && c <= 'f':
+			c = c - 'a' + 10
+		case 'A' <= c && c <= 'F':
+			c = c - 'A' + 10
+		default:
+			return -1
+		}
+		r = r*16 + rune(c)
+	}
+	return r
+}
diff --git a/vendor/github.com/goccy/go-json/internal/decoder/wrapped_string.go b/vendor/github.com/goccy/go-json/internal/decoder/wrapped_string.go
new file mode 100644
index 00000000..0c4e2e6e
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/decoder/wrapped_string.go
@@ -0,0 +1,73 @@
+package decoder
+
+import (
+	"fmt"
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type wrappedStringDecoder struct {
+	typ           *runtime.Type
+	dec           Decoder
+	stringDecoder *stringDecoder
+	structName    string
+	fieldName     string
+	isPtrType     bool
+}
+
+func newWrappedStringDecoder(typ *runtime.Type, dec Decoder, structName, fieldName string) *wrappedStringDecoder {
+	return &wrappedStringDecoder{
+		typ:           typ,
+		dec:           dec,
+		stringDecoder: newStringDecoder(structName, fieldName),
+		structName:    structName,
+		fieldName:     fieldName,
+		isPtrType:     typ.Kind() == reflect.Ptr,
+	}
+}
+
+func (d *wrappedStringDecoder) DecodeStream(s *Stream, depth int64, p unsafe.Pointer) error {
+	bytes, err := d.stringDecoder.decodeStreamByte(s)
+	if err != nil {
+		return err
+	}
+	if bytes == nil {
+		if d.isPtrType {
+			*(*unsafe.Pointer)(p) = nil
+		}
+		return nil
+	}
+	b := make([]byte, len(bytes)+1)
+	copy(b, bytes)
+	if _, err := d.dec.Decode(&RuntimeContext{Buf: b}, 0, depth, p); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (d *wrappedStringDecoder) Decode(ctx *RuntimeContext, cursor, depth int64, p unsafe.Pointer) (int64, error) {
+	bytes, c, err := d.stringDecoder.decodeByte(ctx.Buf, cursor)
+	if err != nil {
+		return 0, err
+	}
+	if bytes == nil {
+		if d.isPtrType {
+			*(*unsafe.Pointer)(p) = nil
+		}
+		return c, nil
+	}
+	bytes = append(bytes, nul)
+	oldBuf := ctx.Buf
+	ctx.Buf = bytes
+	if _, err := d.dec.Decode(ctx, 0, depth, p); err != nil {
+		return 0, err
+	}
+	ctx.Buf = oldBuf
+	return c, nil
+}
+
+func (d *wrappedStringDecoder) DecodePath(ctx *RuntimeContext, cursor, depth int64) ([][]byte, int64, error) {
+	return nil, 0, fmt.Errorf("json: wrapped string decoder does not support decode path")
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/code.go b/vendor/github.com/goccy/go-json/internal/encoder/code.go
new file mode 100644
index 00000000..5b08faef
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/code.go
@@ -0,0 +1,1023 @@
+package encoder
+
+import (
+	"fmt"
+	"reflect"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type Code interface {
+	Kind() CodeKind
+	ToOpcode(*compileContext) Opcodes
+	Filter(*FieldQuery) Code
+}
+
+type AnonymousCode interface {
+	ToAnonymousOpcode(*compileContext) Opcodes
+}
+
+type Opcodes []*Opcode
+
+func (o Opcodes) First() *Opcode {
+	if len(o) == 0 {
+		return nil
+	}
+	return o[0]
+}
+
+func (o Opcodes) Last() *Opcode {
+	if len(o) == 0 {
+		return nil
+	}
+	return o[len(o)-1]
+}
+
+func (o Opcodes) Add(codes ...*Opcode) Opcodes {
+	return append(o, codes...)
+}
+
+type CodeKind int
+
+const (
+	CodeKindInterface CodeKind = iota
+	CodeKindPtr
+	CodeKindInt
+	CodeKindUint
+	CodeKindFloat
+	CodeKindString
+	CodeKindBool
+	CodeKindStruct
+	CodeKindMap
+	CodeKindSlice
+	CodeKindArray
+	CodeKindBytes
+	CodeKindMarshalJSON
+	CodeKindMarshalText
+	CodeKindRecursive
+)
+
+type IntCode struct {
+	typ      *runtime.Type
+	bitSize  uint8
+	isString bool
+	isPtr    bool
+}
+
+func (c *IntCode) Kind() CodeKind {
+	return CodeKindInt
+}
+
+func (c *IntCode) ToOpcode(ctx *compileContext) Opcodes {
+	var code *Opcode
+	switch {
+	case c.isPtr:
+		code = newOpCode(ctx, c.typ, OpIntPtr)
+	case c.isString:
+		code = newOpCode(ctx, c.typ, OpIntString)
+	default:
+		code = newOpCode(ctx, c.typ, OpInt)
+	}
+	code.NumBitSize = c.bitSize
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *IntCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type UintCode struct {
+	typ      *runtime.Type
+	bitSize  uint8
+	isString bool
+	isPtr    bool
+}
+
+func (c *UintCode) Kind() CodeKind {
+	return CodeKindUint
+}
+
+func (c *UintCode) ToOpcode(ctx *compileContext) Opcodes {
+	var code *Opcode
+	switch {
+	case c.isPtr:
+		code = newOpCode(ctx, c.typ, OpUintPtr)
+	case c.isString:
+		code = newOpCode(ctx, c.typ, OpUintString)
+	default:
+		code = newOpCode(ctx, c.typ, OpUint)
+	}
+	code.NumBitSize = c.bitSize
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *UintCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type FloatCode struct {
+	typ     *runtime.Type
+	bitSize uint8
+	isPtr   bool
+}
+
+func (c *FloatCode) Kind() CodeKind {
+	return CodeKindFloat
+}
+
+func (c *FloatCode) ToOpcode(ctx *compileContext) Opcodes {
+	var code *Opcode
+	switch {
+	case c.isPtr:
+		switch c.bitSize {
+		case 32:
+			code = newOpCode(ctx, c.typ, OpFloat32Ptr)
+		default:
+			code = newOpCode(ctx, c.typ, OpFloat64Ptr)
+		}
+	default:
+		switch c.bitSize {
+		case 32:
+			code = newOpCode(ctx, c.typ, OpFloat32)
+		default:
+			code = newOpCode(ctx, c.typ, OpFloat64)
+		}
+	}
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *FloatCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type StringCode struct {
+	typ   *runtime.Type
+	isPtr bool
+}
+
+func (c *StringCode) Kind() CodeKind {
+	return CodeKindString
+}
+
+func (c *StringCode) ToOpcode(ctx *compileContext) Opcodes {
+	isJSONNumberType := c.typ == runtime.Type2RType(jsonNumberType)
+	var code *Opcode
+	if c.isPtr {
+		if isJSONNumberType {
+			code = newOpCode(ctx, c.typ, OpNumberPtr)
+		} else {
+			code = newOpCode(ctx, c.typ, OpStringPtr)
+		}
+	} else {
+		if isJSONNumberType {
+			code = newOpCode(ctx, c.typ, OpNumber)
+		} else {
+			code = newOpCode(ctx, c.typ, OpString)
+		}
+	}
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *StringCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type BoolCode struct {
+	typ   *runtime.Type
+	isPtr bool
+}
+
+func (c *BoolCode) Kind() CodeKind {
+	return CodeKindBool
+}
+
+func (c *BoolCode) ToOpcode(ctx *compileContext) Opcodes {
+	var code *Opcode
+	switch {
+	case c.isPtr:
+		code = newOpCode(ctx, c.typ, OpBoolPtr)
+	default:
+		code = newOpCode(ctx, c.typ, OpBool)
+	}
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *BoolCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type BytesCode struct {
+	typ   *runtime.Type
+	isPtr bool
+}
+
+func (c *BytesCode) Kind() CodeKind {
+	return CodeKindBytes
+}
+
+func (c *BytesCode) ToOpcode(ctx *compileContext) Opcodes {
+	var code *Opcode
+	switch {
+	case c.isPtr:
+		code = newOpCode(ctx, c.typ, OpBytesPtr)
+	default:
+		code = newOpCode(ctx, c.typ, OpBytes)
+	}
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *BytesCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type SliceCode struct {
+	typ   *runtime.Type
+	value Code
+}
+
+func (c *SliceCode) Kind() CodeKind {
+	return CodeKindSlice
+}
+
+func (c *SliceCode) ToOpcode(ctx *compileContext) Opcodes {
+	// header => opcode => elem => end
+	//             ^        |
+	//             |________|
+	size := c.typ.Elem().Size()
+	header := newSliceHeaderCode(ctx, c.typ)
+	ctx.incIndex()
+
+	ctx.incIndent()
+	codes := c.value.ToOpcode(ctx)
+	ctx.decIndent()
+
+	codes.First().Flags |= IndirectFlags
+	elemCode := newSliceElemCode(ctx, c.typ.Elem(), header, size)
+	ctx.incIndex()
+	end := newOpCode(ctx, c.typ, OpSliceEnd)
+	ctx.incIndex()
+	header.End = end
+	header.Next = codes.First()
+	codes.Last().Next = elemCode
+	elemCode.Next = codes.First()
+	elemCode.End = end
+	return Opcodes{header}.Add(codes...).Add(elemCode).Add(end)
+}
+
+func (c *SliceCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type ArrayCode struct {
+	typ   *runtime.Type
+	value Code
+}
+
+func (c *ArrayCode) Kind() CodeKind {
+	return CodeKindArray
+}
+
+func (c *ArrayCode) ToOpcode(ctx *compileContext) Opcodes {
+	// header => opcode => elem => end
+	//             ^        |
+	//             |________|
+	elem := c.typ.Elem()
+	alen := c.typ.Len()
+	size := elem.Size()
+
+	header := newArrayHeaderCode(ctx, c.typ, alen)
+	ctx.incIndex()
+
+	ctx.incIndent()
+	codes := c.value.ToOpcode(ctx)
+	ctx.decIndent()
+
+	codes.First().Flags |= IndirectFlags
+
+	elemCode := newArrayElemCode(ctx, elem, header, alen, size)
+	ctx.incIndex()
+
+	end := newOpCode(ctx, c.typ, OpArrayEnd)
+	ctx.incIndex()
+
+	header.End = end
+	header.Next = codes.First()
+	codes.Last().Next = elemCode
+	elemCode.Next = codes.First()
+	elemCode.End = end
+
+	return Opcodes{header}.Add(codes...).Add(elemCode).Add(end)
+}
+
+func (c *ArrayCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type MapCode struct {
+	typ   *runtime.Type
+	key   Code
+	value Code
+}
+
+func (c *MapCode) Kind() CodeKind {
+	return CodeKindMap
+}
+
+func (c *MapCode) ToOpcode(ctx *compileContext) Opcodes {
+	// header => code => value => code => key => code => value => code => end
+	//                                     ^                       |
+	//                                     |_______________________|
+	header := newMapHeaderCode(ctx, c.typ)
+	ctx.incIndex()
+
+	keyCodes := c.key.ToOpcode(ctx)
+
+	value := newMapValueCode(ctx, c.typ.Elem(), header)
+	ctx.incIndex()
+
+	ctx.incIndent()
+	valueCodes := c.value.ToOpcode(ctx)
+	ctx.decIndent()
+
+	valueCodes.First().Flags |= IndirectFlags
+
+	key := newMapKeyCode(ctx, c.typ.Key(), header)
+	ctx.incIndex()
+
+	end := newMapEndCode(ctx, c.typ, header)
+	ctx.incIndex()
+
+	header.Next = keyCodes.First()
+	keyCodes.Last().Next = value
+	value.Next = valueCodes.First()
+	valueCodes.Last().Next = key
+	key.Next = keyCodes.First()
+
+	header.End = end
+	key.End = end
+	value.End = end
+	return Opcodes{header}.Add(keyCodes...).Add(value).Add(valueCodes...).Add(key).Add(end)
+}
+
+func (c *MapCode) Filter(_ *FieldQuery) Code {
+	return c
+}
+
+type StructCode struct {
+	typ                       *runtime.Type
+	fields                    []*StructFieldCode
+	isPtr                     bool
+	disableIndirectConversion bool
+	isIndirect                bool
+	isRecursive               bool
+}
+
+func (c *StructCode) Kind() CodeKind {
+	return CodeKindStruct
+}
+
+func (c *StructCode) lastFieldCode(field *StructFieldCode, firstField *Opcode) *Opcode {
+	if isEmbeddedStruct(field) {
+		return c.lastAnonymousFieldCode(firstField)
+	}
+	lastField := firstField
+	for lastField.NextField != nil {
+		lastField = lastField.NextField
+	}
+	return lastField
+}
+
+func (c *StructCode) lastAnonymousFieldCode(firstField *Opcode) *Opcode {
+	// firstField is special StructHead operation for anonymous structure.
+	// So, StructHead's next operation is truly struct head operation.
+	for firstField.Op == OpStructHead || firstField.Op == OpStructField {
+		firstField = firstField.Next
+	}
+	lastField := firstField
+	for lastField.NextField != nil {
+		lastField = lastField.NextField
+	}
+	return lastField
+}
+
+func (c *StructCode) ToOpcode(ctx *compileContext) Opcodes {
+	// header => code => structField => code => end
+	//                        ^          |
+	//                        |__________|
+	if c.isRecursive {
+		recursive := newRecursiveCode(ctx, c.typ, &CompiledCode{})
+		recursive.Type = c.typ
+		ctx.incIndex()
+		*ctx.recursiveCodes = append(*ctx.recursiveCodes, recursive)
+		return Opcodes{recursive}
+	}
+	codes := Opcodes{}
+	var prevField *Opcode
+	ctx.incIndent()
+	for idx, field := range c.fields {
+		isFirstField := idx == 0
+		isEndField := idx == len(c.fields)-1
+		fieldCodes := field.ToOpcode(ctx, isFirstField, isEndField)
+		for _, code := range fieldCodes {
+			if c.isIndirect {
+				code.Flags |= IndirectFlags
+			}
+		}
+		firstField := fieldCodes.First()
+		if len(codes) > 0 {
+			codes.Last().Next = firstField
+			firstField.Idx = codes.First().Idx
+		}
+		if prevField != nil {
+			prevField.NextField = firstField
+		}
+		if isEndField {
+			endField := fieldCodes.Last()
+			if len(codes) > 0 {
+				codes.First().End = endField
+			} else {
+				firstField.End = endField
+			}
+			codes = codes.Add(fieldCodes...)
+			break
+		}
+		prevField = c.lastFieldCode(field, firstField)
+		codes = codes.Add(fieldCodes...)
+	}
+	if len(codes) == 0 {
+		head := &Opcode{
+			Op:         OpStructHead,
+			Idx:        opcodeOffset(ctx.ptrIndex),
+			Type:       c.typ,
+			DisplayIdx: ctx.opcodeIndex,
+			Indent:     ctx.indent,
+		}
+		ctx.incOpcodeIndex()
+		end := &Opcode{
+			Op:         OpStructEnd,
+			Idx:        opcodeOffset(ctx.ptrIndex),
+			DisplayIdx: ctx.opcodeIndex,
+			Indent:     ctx.indent,
+		}
+		head.NextField = end
+		head.Next = end
+		head.End = end
+		codes = codes.Add(head, end)
+		ctx.incIndex()
+	}
+	ctx.decIndent()
+	ctx.structTypeToCodes[uintptr(unsafe.Pointer(c.typ))] = codes
+	return codes
+}
+
+func (c *StructCode) ToAnonymousOpcode(ctx *compileContext) Opcodes {
+	// header => code => structField => code => end
+	//                        ^          |
+	//                        |__________|
+	if c.isRecursive {
+		recursive := newRecursiveCode(ctx, c.typ, &CompiledCode{})
+		recursive.Type = c.typ
+		ctx.incIndex()
+		*ctx.recursiveCodes = append(*ctx.recursiveCodes, recursive)
+		return Opcodes{recursive}
+	}
+	codes := Opcodes{}
+	var prevField *Opcode
+	for idx, field := range c.fields {
+		isFirstField := idx == 0
+		isEndField := idx == len(c.fields)-1
+		fieldCodes := field.ToAnonymousOpcode(ctx, isFirstField, isEndField)
+		for _, code := range fieldCodes {
+			if c.isIndirect {
+				code.Flags |= IndirectFlags
+			}
+		}
+		firstField := fieldCodes.First()
+		if len(codes) > 0 {
+			codes.Last().Next = firstField
+			firstField.Idx = codes.First().Idx
+		}
+		if prevField != nil {
+			prevField.NextField = firstField
+		}
+		if isEndField {
+			lastField := fieldCodes.Last()
+			if len(codes) > 0 {
+				codes.First().End = lastField
+			} else {
+				firstField.End = lastField
+			}
+		}
+		prevField = firstField
+		codes = codes.Add(fieldCodes...)
+	}
+	return codes
+}
+
+func (c *StructCode) removeFieldsByTags(tags runtime.StructTags) {
+	fields := make([]*StructFieldCode, 0, len(c.fields))
+	for _, field := range c.fields {
+		if field.isAnonymous {
+			structCode := field.getAnonymousStruct()
+			if structCode != nil && !structCode.isRecursive {
+				structCode.removeFieldsByTags(tags)
+				if len(structCode.fields) > 0 {
+					fields = append(fields, field)
+				}
+				continue
+			}
+		}
+		if tags.ExistsKey(field.key) {
+			continue
+		}
+		fields = append(fields, field)
+	}
+	c.fields = fields
+}
+
+func (c *StructCode) enableIndirect() {
+	if c.isIndirect {
+		return
+	}
+	c.isIndirect = true
+	if len(c.fields) == 0 {
+		return
+	}
+	structCode := c.fields[0].getStruct()
+	if structCode == nil {
+		return
+	}
+	structCode.enableIndirect()
+}
+
+func (c *StructCode) Filter(query *FieldQuery) Code {
+	fieldMap := map[string]*FieldQuery{}
+	for _, field := range query.Fields {
+		fieldMap[field.Name] = field
+	}
+	fields := make([]*StructFieldCode, 0, len(c.fields))
+	for _, field := range c.fields {
+		query, exists := fieldMap[field.key]
+		if !exists {
+			continue
+		}
+		fieldCode := &StructFieldCode{
+			typ:                field.typ,
+			key:                field.key,
+			tag:                field.tag,
+			value:              field.value,
+			offset:             field.offset,
+			isAnonymous:        field.isAnonymous,
+			isTaggedKey:        field.isTaggedKey,
+			isNilableType:      field.isNilableType,
+			isNilCheck:         field.isNilCheck,
+			isAddrForMarshaler: field.isAddrForMarshaler,
+			isNextOpPtrType:    field.isNextOpPtrType,
+		}
+		if len(query.Fields) > 0 {
+			fieldCode.value = fieldCode.value.Filter(query)
+		}
+		fields = append(fields, fieldCode)
+	}
+	return &StructCode{
+		typ:                       c.typ,
+		fields:                    fields,
+		isPtr:                     c.isPtr,
+		disableIndirectConversion: c.disableIndirectConversion,
+		isIndirect:                c.isIndirect,
+		isRecursive:               c.isRecursive,
+	}
+}
+
+type StructFieldCode struct {
+	typ                *runtime.Type
+	key                string
+	tag                *runtime.StructTag
+	value              Code
+	offset             uintptr
+	isAnonymous        bool
+	isTaggedKey        bool
+	isNilableType      bool
+	isNilCheck         bool
+	isAddrForMarshaler bool
+	isNextOpPtrType    bool
+	isMarshalerContext bool
+}
+
+func (c *StructFieldCode) getStruct() *StructCode {
+	value := c.value
+	ptr, ok := value.(*PtrCode)
+	if ok {
+		value = ptr.value
+	}
+	structCode, ok := value.(*StructCode)
+	if ok {
+		return structCode
+	}
+	return nil
+}
+
+func (c *StructFieldCode) getAnonymousStruct() *StructCode {
+	if !c.isAnonymous {
+		return nil
+	}
+	return c.getStruct()
+}
+
+func optimizeStructHeader(code *Opcode, tag *runtime.StructTag) OpType {
+	headType := code.ToHeaderType(tag.IsString)
+	if tag.IsOmitEmpty {
+		headType = headType.HeadToOmitEmptyHead()
+	}
+	return headType
+}
+
+func optimizeStructField(code *Opcode, tag *runtime.StructTag) OpType {
+	fieldType := code.ToFieldType(tag.IsString)
+	if tag.IsOmitEmpty {
+		fieldType = fieldType.FieldToOmitEmptyField()
+	}
+	return fieldType
+}
+
+func (c *StructFieldCode) headerOpcodes(ctx *compileContext, field *Opcode, valueCodes Opcodes) Opcodes {
+	value := valueCodes.First()
+	op := optimizeStructHeader(value, c.tag)
+	field.Op = op
+	if value.Flags&MarshalerContextFlags != 0 {
+		field.Flags |= MarshalerContextFlags
+	}
+	field.NumBitSize = value.NumBitSize
+	field.PtrNum = value.PtrNum
+	field.FieldQuery = value.FieldQuery
+	fieldCodes := Opcodes{field}
+	if op.IsMultipleOpHead() {
+		field.Next = value
+		fieldCodes = fieldCodes.Add(valueCodes...)
+	} else {
+		ctx.decIndex()
+	}
+	return fieldCodes
+}
+
+func (c *StructFieldCode) fieldOpcodes(ctx *compileContext, field *Opcode, valueCodes Opcodes) Opcodes {
+	value := valueCodes.First()
+	op := optimizeStructField(value, c.tag)
+	field.Op = op
+	if value.Flags&MarshalerContextFlags != 0 {
+		field.Flags |= MarshalerContextFlags
+	}
+	field.NumBitSize = value.NumBitSize
+	field.PtrNum = value.PtrNum
+	field.FieldQuery = value.FieldQuery
+
+	fieldCodes := Opcodes{field}
+	if op.IsMultipleOpField() {
+		field.Next = value
+		fieldCodes = fieldCodes.Add(valueCodes...)
+	} else {
+		ctx.decIndex()
+	}
+	return fieldCodes
+}
+
+func (c *StructFieldCode) addStructEndCode(ctx *compileContext, codes Opcodes) Opcodes {
+	end := &Opcode{
+		Op:         OpStructEnd,
+		Idx:        opcodeOffset(ctx.ptrIndex),
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+	}
+	codes.Last().Next = end
+	code := codes.First()
+	for code.Op == OpStructField || code.Op == OpStructHead {
+		code = code.Next
+	}
+	for code.NextField != nil {
+		code = code.NextField
+	}
+	code.NextField = end
+
+	codes = codes.Add(end)
+	ctx.incOpcodeIndex()
+	return codes
+}
+
+func (c *StructFieldCode) structKey(ctx *compileContext) string {
+	if ctx.escapeKey {
+		rctx := &RuntimeContext{Option: &Option{Flag: HTMLEscapeOption}}
+		return fmt.Sprintf(`%s:`, string(AppendString(rctx, []byte{}, c.key)))
+	}
+	return fmt.Sprintf(`"%s":`, c.key)
+}
+
+func (c *StructFieldCode) flags() OpFlags {
+	var flags OpFlags
+	if c.isTaggedKey {
+		flags |= IsTaggedKeyFlags
+	}
+	if c.isNilableType {
+		flags |= IsNilableTypeFlags
+	}
+	if c.isNilCheck {
+		flags |= NilCheckFlags
+	}
+	if c.isAddrForMarshaler {
+		flags |= AddrForMarshalerFlags
+	}
+	if c.isNextOpPtrType {
+		flags |= IsNextOpPtrTypeFlags
+	}
+	if c.isAnonymous {
+		flags |= AnonymousKeyFlags
+	}
+	if c.isMarshalerContext {
+		flags |= MarshalerContextFlags
+	}
+	return flags
+}
+
+func (c *StructFieldCode) toValueOpcodes(ctx *compileContext) Opcodes {
+	if c.isAnonymous {
+		anonymCode, ok := c.value.(AnonymousCode)
+		if ok {
+			return anonymCode.ToAnonymousOpcode(ctx)
+		}
+	}
+	return c.value.ToOpcode(ctx)
+}
+
+func (c *StructFieldCode) ToOpcode(ctx *compileContext, isFirstField, isEndField bool) Opcodes {
+	field := &Opcode{
+		Idx:        opcodeOffset(ctx.ptrIndex),
+		Flags:      c.flags(),
+		Key:        c.structKey(ctx),
+		Offset:     uint32(c.offset),
+		Type:       c.typ,
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+		DisplayKey: c.key,
+	}
+	ctx.incIndex()
+	valueCodes := c.toValueOpcodes(ctx)
+	if isFirstField {
+		codes := c.headerOpcodes(ctx, field, valueCodes)
+		if isEndField {
+			codes = c.addStructEndCode(ctx, codes)
+		}
+		return codes
+	}
+	codes := c.fieldOpcodes(ctx, field, valueCodes)
+	if isEndField {
+		if isEnableStructEndOptimization(c.value) {
+			field.Op = field.Op.FieldToEnd()
+		} else {
+			codes = c.addStructEndCode(ctx, codes)
+		}
+	}
+	return codes
+}
+
+func (c *StructFieldCode) ToAnonymousOpcode(ctx *compileContext, isFirstField, isEndField bool) Opcodes {
+	field := &Opcode{
+		Idx:        opcodeOffset(ctx.ptrIndex),
+		Flags:      c.flags() | AnonymousHeadFlags,
+		Key:        c.structKey(ctx),
+		Offset:     uint32(c.offset),
+		Type:       c.typ,
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+		DisplayKey: c.key,
+	}
+	ctx.incIndex()
+	valueCodes := c.toValueOpcodes(ctx)
+	if isFirstField {
+		return c.headerOpcodes(ctx, field, valueCodes)
+	}
+	return c.fieldOpcodes(ctx, field, valueCodes)
+}
+
+func isEnableStructEndOptimization(value Code) bool {
+	switch value.Kind() {
+	case CodeKindInt,
+		CodeKindUint,
+		CodeKindFloat,
+		CodeKindString,
+		CodeKindBool,
+		CodeKindBytes:
+		return true
+	case CodeKindPtr:
+		return isEnableStructEndOptimization(value.(*PtrCode).value)
+	default:
+		return false
+	}
+}
+
+type InterfaceCode struct {
+	typ        *runtime.Type
+	fieldQuery *FieldQuery
+	isPtr      bool
+}
+
+func (c *InterfaceCode) Kind() CodeKind {
+	return CodeKindInterface
+}
+
+func (c *InterfaceCode) ToOpcode(ctx *compileContext) Opcodes {
+	var code *Opcode
+	switch {
+	case c.isPtr:
+		code = newOpCode(ctx, c.typ, OpInterfacePtr)
+	default:
+		code = newOpCode(ctx, c.typ, OpInterface)
+	}
+	code.FieldQuery = c.fieldQuery
+	if c.typ.NumMethod() > 0 {
+		code.Flags |= NonEmptyInterfaceFlags
+	}
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *InterfaceCode) Filter(query *FieldQuery) Code {
+	return &InterfaceCode{
+		typ:        c.typ,
+		fieldQuery: query,
+		isPtr:      c.isPtr,
+	}
+}
+
+type MarshalJSONCode struct {
+	typ                *runtime.Type
+	fieldQuery         *FieldQuery
+	isAddrForMarshaler bool
+	isNilableType      bool
+	isMarshalerContext bool
+}
+
+func (c *MarshalJSONCode) Kind() CodeKind {
+	return CodeKindMarshalJSON
+}
+
+func (c *MarshalJSONCode) ToOpcode(ctx *compileContext) Opcodes {
+	code := newOpCode(ctx, c.typ, OpMarshalJSON)
+	code.FieldQuery = c.fieldQuery
+	if c.isAddrForMarshaler {
+		code.Flags |= AddrForMarshalerFlags
+	}
+	if c.isMarshalerContext {
+		code.Flags |= MarshalerContextFlags
+	}
+	if c.isNilableType {
+		code.Flags |= IsNilableTypeFlags
+	} else {
+		code.Flags &= ^IsNilableTypeFlags
+	}
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *MarshalJSONCode) Filter(query *FieldQuery) Code {
+	return &MarshalJSONCode{
+		typ:                c.typ,
+		fieldQuery:         query,
+		isAddrForMarshaler: c.isAddrForMarshaler,
+		isNilableType:      c.isNilableType,
+		isMarshalerContext: c.isMarshalerContext,
+	}
+}
+
+type MarshalTextCode struct {
+	typ                *runtime.Type
+	fieldQuery         *FieldQuery
+	isAddrForMarshaler bool
+	isNilableType      bool
+}
+
+func (c *MarshalTextCode) Kind() CodeKind {
+	return CodeKindMarshalText
+}
+
+func (c *MarshalTextCode) ToOpcode(ctx *compileContext) Opcodes {
+	code := newOpCode(ctx, c.typ, OpMarshalText)
+	code.FieldQuery = c.fieldQuery
+	if c.isAddrForMarshaler {
+		code.Flags |= AddrForMarshalerFlags
+	}
+	if c.isNilableType {
+		code.Flags |= IsNilableTypeFlags
+	} else {
+		code.Flags &= ^IsNilableTypeFlags
+	}
+	ctx.incIndex()
+	return Opcodes{code}
+}
+
+func (c *MarshalTextCode) Filter(query *FieldQuery) Code {
+	return &MarshalTextCode{
+		typ:                c.typ,
+		fieldQuery:         query,
+		isAddrForMarshaler: c.isAddrForMarshaler,
+		isNilableType:      c.isNilableType,
+	}
+}
+
+type PtrCode struct {
+	typ    *runtime.Type
+	value  Code
+	ptrNum uint8
+}
+
+func (c *PtrCode) Kind() CodeKind {
+	return CodeKindPtr
+}
+
+func (c *PtrCode) ToOpcode(ctx *compileContext) Opcodes {
+	codes := c.value.ToOpcode(ctx)
+	codes.First().Op = convertPtrOp(codes.First())
+	codes.First().PtrNum = c.ptrNum
+	return codes
+}
+
+func (c *PtrCode) ToAnonymousOpcode(ctx *compileContext) Opcodes {
+	var codes Opcodes
+	anonymCode, ok := c.value.(AnonymousCode)
+	if ok {
+		codes = anonymCode.ToAnonymousOpcode(ctx)
+	} else {
+		codes = c.value.ToOpcode(ctx)
+	}
+	codes.First().Op = convertPtrOp(codes.First())
+	codes.First().PtrNum = c.ptrNum
+	return codes
+}
+
+func (c *PtrCode) Filter(query *FieldQuery) Code {
+	return &PtrCode{
+		typ:    c.typ,
+		value:  c.value.Filter(query),
+		ptrNum: c.ptrNum,
+	}
+}
+
+func convertPtrOp(code *Opcode) OpType {
+	ptrHeadOp := code.Op.HeadToPtrHead()
+	if code.Op != ptrHeadOp {
+		if code.PtrNum > 0 {
+			// ptr field and ptr head
+			code.PtrNum--
+		}
+		return ptrHeadOp
+	}
+	switch code.Op {
+	case OpInt:
+		return OpIntPtr
+	case OpUint:
+		return OpUintPtr
+	case OpFloat32:
+		return OpFloat32Ptr
+	case OpFloat64:
+		return OpFloat64Ptr
+	case OpString:
+		return OpStringPtr
+	case OpBool:
+		return OpBoolPtr
+	case OpBytes:
+		return OpBytesPtr
+	case OpNumber:
+		return OpNumberPtr
+	case OpArray:
+		return OpArrayPtr
+	case OpSlice:
+		return OpSlicePtr
+	case OpMap:
+		return OpMapPtr
+	case OpMarshalJSON:
+		return OpMarshalJSONPtr
+	case OpMarshalText:
+		return OpMarshalTextPtr
+	case OpInterface:
+		return OpInterfacePtr
+	case OpRecursive:
+		return OpRecursivePtr
+	}
+	return code.Op
+}
+
+func isEmbeddedStruct(field *StructFieldCode) bool {
+	if !field.isAnonymous {
+		return false
+	}
+	t := field.typ
+	if t.Kind() == reflect.Ptr {
+		t = t.Elem()
+	}
+	return t.Kind() == reflect.Struct
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compact.go b/vendor/github.com/goccy/go-json/internal/encoder/compact.go
new file mode 100644
index 00000000..e287a6c0
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/compact.go
@@ -0,0 +1,286 @@
+package encoder
+
+import (
+	"bytes"
+	"fmt"
+	"strconv"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+var (
+	isWhiteSpace = [256]bool{
+		' ':  true,
+		'\n': true,
+		'\t': true,
+		'\r': true,
+	}
+	isHTMLEscapeChar = [256]bool{
+		'<': true,
+		'>': true,
+		'&': true,
+	}
+	nul = byte('\000')
+)
+
+func Compact(buf *bytes.Buffer, src []byte, escape bool) error {
+	if len(src) == 0 {
+		return errors.ErrUnexpectedEndOfJSON("", 0)
+	}
+	buf.Grow(len(src))
+	dst := buf.Bytes()
+
+	ctx := TakeRuntimeContext()
+	ctxBuf := ctx.Buf[:0]
+	ctxBuf = append(append(ctxBuf, src...), nul)
+	ctx.Buf = ctxBuf
+
+	if err := compactAndWrite(buf, dst, ctxBuf, escape); err != nil {
+		ReleaseRuntimeContext(ctx)
+		return err
+	}
+	ReleaseRuntimeContext(ctx)
+	return nil
+}
+
+func compactAndWrite(buf *bytes.Buffer, dst []byte, src []byte, escape bool) error {
+	dst, err := compact(dst, src, escape)
+	if err != nil {
+		return err
+	}
+	if _, err := buf.Write(dst); err != nil {
+		return err
+	}
+	return nil
+}
+
+func compact(dst, src []byte, escape bool) ([]byte, error) {
+	buf, cursor, err := compactValue(dst, src, 0, escape)
+	if err != nil {
+		return nil, err
+	}
+	if err := validateEndBuf(src, cursor); err != nil {
+		return nil, err
+	}
+	return buf, nil
+}
+
+func validateEndBuf(src []byte, cursor int64) error {
+	for {
+		switch src[cursor] {
+		case ' ', '\t', '\n', '\r':
+			cursor++
+			continue
+		case nul:
+			return nil
+		}
+		return errors.ErrSyntax(
+			fmt.Sprintf("invalid character '%c' after top-level value", src[cursor]),
+			cursor+1,
+		)
+	}
+}
+
+func skipWhiteSpace(buf []byte, cursor int64) int64 {
+LOOP:
+	if isWhiteSpace[buf[cursor]] {
+		cursor++
+		goto LOOP
+	}
+	return cursor
+}
+
+func compactValue(dst, src []byte, cursor int64, escape bool) ([]byte, int64, error) {
+	for {
+		switch src[cursor] {
+		case ' ', '\t', '\n', '\r':
+			cursor++
+			continue
+		case '{':
+			return compactObject(dst, src, cursor, escape)
+		case '}':
+			return nil, 0, errors.ErrSyntax("unexpected character '}'", cursor)
+		case '[':
+			return compactArray(dst, src, cursor, escape)
+		case ']':
+			return nil, 0, errors.ErrSyntax("unexpected character ']'", cursor)
+		case '"':
+			return compactString(dst, src, cursor, escape)
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return compactNumber(dst, src, cursor)
+		case 't':
+			return compactTrue(dst, src, cursor)
+		case 'f':
+			return compactFalse(dst, src, cursor)
+		case 'n':
+			return compactNull(dst, src, cursor)
+		default:
+			return nil, 0, errors.ErrSyntax(fmt.Sprintf("unexpected character '%c'", src[cursor]), cursor)
+		}
+	}
+}
+
+func compactObject(dst, src []byte, cursor int64, escape bool) ([]byte, int64, error) {
+	if src[cursor] == '{' {
+		dst = append(dst, '{')
+	} else {
+		return nil, 0, errors.ErrExpected("expected { character for object value", cursor)
+	}
+	cursor = skipWhiteSpace(src, cursor+1)
+	if src[cursor] == '}' {
+		dst = append(dst, '}')
+		return dst, cursor + 1, nil
+	}
+	var err error
+	for {
+		cursor = skipWhiteSpace(src, cursor)
+		dst, cursor, err = compactString(dst, src, cursor, escape)
+		if err != nil {
+			return nil, 0, err
+		}
+		cursor = skipWhiteSpace(src, cursor)
+		if src[cursor] != ':' {
+			return nil, 0, errors.ErrExpected("colon after object key", cursor)
+		}
+		dst = append(dst, ':')
+		dst, cursor, err = compactValue(dst, src, cursor+1, escape)
+		if err != nil {
+			return nil, 0, err
+		}
+		cursor = skipWhiteSpace(src, cursor)
+		switch src[cursor] {
+		case '}':
+			dst = append(dst, '}')
+			cursor++
+			return dst, cursor, nil
+		case ',':
+			dst = append(dst, ',')
+		default:
+			return nil, 0, errors.ErrExpected("comma after object value", cursor)
+		}
+		cursor++
+	}
+}
+
+func compactArray(dst, src []byte, cursor int64, escape bool) ([]byte, int64, error) {
+	if src[cursor] == '[' {
+		dst = append(dst, '[')
+	} else {
+		return nil, 0, errors.ErrExpected("expected [ character for array value", cursor)
+	}
+	cursor = skipWhiteSpace(src, cursor+1)
+	if src[cursor] == ']' {
+		dst = append(dst, ']')
+		return dst, cursor + 1, nil
+	}
+	var err error
+	for {
+		dst, cursor, err = compactValue(dst, src, cursor, escape)
+		if err != nil {
+			return nil, 0, err
+		}
+		cursor = skipWhiteSpace(src, cursor)
+		switch src[cursor] {
+		case ']':
+			dst = append(dst, ']')
+			cursor++
+			return dst, cursor, nil
+		case ',':
+			dst = append(dst, ',')
+		default:
+			return nil, 0, errors.ErrExpected("comma after array value", cursor)
+		}
+		cursor++
+	}
+}
+
+func compactString(dst, src []byte, cursor int64, escape bool) ([]byte, int64, error) {
+	if src[cursor] != '"' {
+		return nil, 0, errors.ErrInvalidCharacter(src[cursor], "string", cursor)
+	}
+	start := cursor
+	for {
+		cursor++
+		c := src[cursor]
+		if escape {
+			if isHTMLEscapeChar[c] {
+				dst = append(dst, src[start:cursor]...)
+				dst = append(dst, `\u00`...)
+				dst = append(dst, hex[c>>4], hex[c&0xF])
+				start = cursor + 1
+			} else if c == 0xE2 && cursor+2 < int64(len(src)) && src[cursor+1] == 0x80 && src[cursor+2]&^1 == 0xA8 {
+				dst = append(dst, src[start:cursor]...)
+				dst = append(dst, `\u202`...)
+				dst = append(dst, hex[src[cursor+2]&0xF])
+				start = cursor + 3
+				cursor += 2
+			}
+		}
+		switch c {
+		case '\\':
+			cursor++
+			if src[cursor] == nul {
+				return nil, 0, errors.ErrUnexpectedEndOfJSON("string", int64(len(src)))
+			}
+		case '"':
+			cursor++
+			return append(dst, src[start:cursor]...), cursor, nil
+		case nul:
+			return nil, 0, errors.ErrUnexpectedEndOfJSON("string", int64(len(src)))
+		}
+	}
+}
+
+func compactNumber(dst, src []byte, cursor int64) ([]byte, int64, error) {
+	start := cursor
+	for {
+		cursor++
+		if floatTable[src[cursor]] {
+			continue
+		}
+		break
+	}
+	num := src[start:cursor]
+	if _, err := strconv.ParseFloat(*(*string)(unsafe.Pointer(&num)), 64); err != nil {
+		return nil, 0, err
+	}
+	dst = append(dst, num...)
+	return dst, cursor, nil
+}
+
+func compactTrue(dst, src []byte, cursor int64) ([]byte, int64, error) {
+	if cursor+3 >= int64(len(src)) {
+		return nil, 0, errors.ErrUnexpectedEndOfJSON("true", cursor)
+	}
+	if !bytes.Equal(src[cursor:cursor+4], []byte(`true`)) {
+		return nil, 0, errors.ErrInvalidCharacter(src[cursor], "true", cursor)
+	}
+	dst = append(dst, "true"...)
+	cursor += 4
+	return dst, cursor, nil
+}
+
+func compactFalse(dst, src []byte, cursor int64) ([]byte, int64, error) {
+	if cursor+4 >= int64(len(src)) {
+		return nil, 0, errors.ErrUnexpectedEndOfJSON("false", cursor)
+	}
+	if !bytes.Equal(src[cursor:cursor+5], []byte(`false`)) {
+		return nil, 0, errors.ErrInvalidCharacter(src[cursor], "false", cursor)
+	}
+	dst = append(dst, "false"...)
+	cursor += 5
+	return dst, cursor, nil
+}
+
+func compactNull(dst, src []byte, cursor int64) ([]byte, int64, error) {
+	if cursor+3 >= int64(len(src)) {
+		return nil, 0, errors.ErrUnexpectedEndOfJSON("null", cursor)
+	}
+	if !bytes.Equal(src[cursor:cursor+4], []byte(`null`)) {
+		return nil, 0, errors.ErrInvalidCharacter(src[cursor], "null", cursor)
+	}
+	dst = append(dst, "null"...)
+	cursor += 4
+	return dst, cursor, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compiler.go b/vendor/github.com/goccy/go-json/internal/encoder/compiler.go
new file mode 100644
index 00000000..37b7aa38
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/compiler.go
@@ -0,0 +1,935 @@
+package encoder
+
+import (
+	"context"
+	"encoding"
+	"encoding/json"
+	"reflect"
+	"sync/atomic"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type marshalerContext interface {
+	MarshalJSON(context.Context) ([]byte, error)
+}
+
+var (
+	marshalJSONType        = reflect.TypeOf((*json.Marshaler)(nil)).Elem()
+	marshalJSONContextType = reflect.TypeOf((*marshalerContext)(nil)).Elem()
+	marshalTextType        = reflect.TypeOf((*encoding.TextMarshaler)(nil)).Elem()
+	jsonNumberType         = reflect.TypeOf(json.Number(""))
+	cachedOpcodeSets       []*OpcodeSet
+	cachedOpcodeMap        unsafe.Pointer // map[uintptr]*OpcodeSet
+	typeAddr               *runtime.TypeAddr
+)
+
+func init() {
+	typeAddr = runtime.AnalyzeTypeAddr()
+	if typeAddr == nil {
+		typeAddr = &runtime.TypeAddr{}
+	}
+	cachedOpcodeSets = make([]*OpcodeSet, typeAddr.AddrRange>>typeAddr.AddrShift+1)
+}
+
+func loadOpcodeMap() map[uintptr]*OpcodeSet {
+	p := atomic.LoadPointer(&cachedOpcodeMap)
+	return *(*map[uintptr]*OpcodeSet)(unsafe.Pointer(&p))
+}
+
+func storeOpcodeSet(typ uintptr, set *OpcodeSet, m map[uintptr]*OpcodeSet) {
+	newOpcodeMap := make(map[uintptr]*OpcodeSet, len(m)+1)
+	newOpcodeMap[typ] = set
+
+	for k, v := range m {
+		newOpcodeMap[k] = v
+	}
+
+	atomic.StorePointer(&cachedOpcodeMap, *(*unsafe.Pointer)(unsafe.Pointer(&newOpcodeMap)))
+}
+
+func compileToGetCodeSetSlowPath(typeptr uintptr) (*OpcodeSet, error) {
+	opcodeMap := loadOpcodeMap()
+	if codeSet, exists := opcodeMap[typeptr]; exists {
+		return codeSet, nil
+	}
+	codeSet, err := newCompiler().compile(typeptr)
+	if err != nil {
+		return nil, err
+	}
+	storeOpcodeSet(typeptr, codeSet, opcodeMap)
+	return codeSet, nil
+}
+
+func getFilteredCodeSetIfNeeded(ctx *RuntimeContext, codeSet *OpcodeSet) (*OpcodeSet, error) {
+	if (ctx.Option.Flag & ContextOption) == 0 {
+		return codeSet, nil
+	}
+	query := FieldQueryFromContext(ctx.Option.Context)
+	if query == nil {
+		return codeSet, nil
+	}
+	ctx.Option.Flag |= FieldQueryOption
+	cacheCodeSet := codeSet.getQueryCache(query.Hash())
+	if cacheCodeSet != nil {
+		return cacheCodeSet, nil
+	}
+	queryCodeSet, err := newCompiler().codeToOpcodeSet(codeSet.Type, codeSet.Code.Filter(query))
+	if err != nil {
+		return nil, err
+	}
+	codeSet.setQueryCache(query.Hash(), queryCodeSet)
+	return queryCodeSet, nil
+}
+
+type Compiler struct {
+	structTypeToCode map[uintptr]*StructCode
+}
+
+func newCompiler() *Compiler {
+	return &Compiler{
+		structTypeToCode: map[uintptr]*StructCode{},
+	}
+}
+
+func (c *Compiler) compile(typeptr uintptr) (*OpcodeSet, error) {
+	// noescape trick for header.typ ( reflect.*rtype )
+	typ := *(**runtime.Type)(unsafe.Pointer(&typeptr))
+	code, err := c.typeToCode(typ)
+	if err != nil {
+		return nil, err
+	}
+	return c.codeToOpcodeSet(typ, code)
+}
+
+func (c *Compiler) codeToOpcodeSet(typ *runtime.Type, code Code) (*OpcodeSet, error) {
+	noescapeKeyCode := c.codeToOpcode(&compileContext{
+		structTypeToCodes: map[uintptr]Opcodes{},
+		recursiveCodes:    &Opcodes{},
+	}, typ, code)
+	if err := noescapeKeyCode.Validate(); err != nil {
+		return nil, err
+	}
+	escapeKeyCode := c.codeToOpcode(&compileContext{
+		structTypeToCodes: map[uintptr]Opcodes{},
+		recursiveCodes:    &Opcodes{},
+		escapeKey:         true,
+	}, typ, code)
+	noescapeKeyCode = copyOpcode(noescapeKeyCode)
+	escapeKeyCode = copyOpcode(escapeKeyCode)
+	setTotalLengthToInterfaceOp(noescapeKeyCode)
+	setTotalLengthToInterfaceOp(escapeKeyCode)
+	interfaceNoescapeKeyCode := copyToInterfaceOpcode(noescapeKeyCode)
+	interfaceEscapeKeyCode := copyToInterfaceOpcode(escapeKeyCode)
+	codeLength := noescapeKeyCode.TotalLength()
+	return &OpcodeSet{
+		Type:                     typ,
+		NoescapeKeyCode:          noescapeKeyCode,
+		EscapeKeyCode:            escapeKeyCode,
+		InterfaceNoescapeKeyCode: interfaceNoescapeKeyCode,
+		InterfaceEscapeKeyCode:   interfaceEscapeKeyCode,
+		CodeLength:               codeLength,
+		EndCode:                  ToEndCode(interfaceNoescapeKeyCode),
+		Code:                     code,
+		QueryCache:               map[string]*OpcodeSet{},
+	}, nil
+}
+
+func (c *Compiler) typeToCode(typ *runtime.Type) (Code, error) {
+	switch {
+	case c.implementsMarshalJSON(typ):
+		return c.marshalJSONCode(typ)
+	case c.implementsMarshalText(typ):
+		return c.marshalTextCode(typ)
+	}
+
+	isPtr := false
+	orgType := typ
+	if typ.Kind() == reflect.Ptr {
+		typ = typ.Elem()
+		isPtr = true
+	}
+	switch {
+	case c.implementsMarshalJSON(typ):
+		return c.marshalJSONCode(orgType)
+	case c.implementsMarshalText(typ):
+		return c.marshalTextCode(orgType)
+	}
+	switch typ.Kind() {
+	case reflect.Slice:
+		elem := typ.Elem()
+		if elem.Kind() == reflect.Uint8 {
+			p := runtime.PtrTo(elem)
+			if !c.implementsMarshalJSONType(p) && !p.Implements(marshalTextType) {
+				return c.bytesCode(typ, isPtr)
+			}
+		}
+		return c.sliceCode(typ)
+	case reflect.Map:
+		if isPtr {
+			return c.ptrCode(runtime.PtrTo(typ))
+		}
+		return c.mapCode(typ)
+	case reflect.Struct:
+		return c.structCode(typ, isPtr)
+	case reflect.Int:
+		return c.intCode(typ, isPtr)
+	case reflect.Int8:
+		return c.int8Code(typ, isPtr)
+	case reflect.Int16:
+		return c.int16Code(typ, isPtr)
+	case reflect.Int32:
+		return c.int32Code(typ, isPtr)
+	case reflect.Int64:
+		return c.int64Code(typ, isPtr)
+	case reflect.Uint, reflect.Uintptr:
+		return c.uintCode(typ, isPtr)
+	case reflect.Uint8:
+		return c.uint8Code(typ, isPtr)
+	case reflect.Uint16:
+		return c.uint16Code(typ, isPtr)
+	case reflect.Uint32:
+		return c.uint32Code(typ, isPtr)
+	case reflect.Uint64:
+		return c.uint64Code(typ, isPtr)
+	case reflect.Float32:
+		return c.float32Code(typ, isPtr)
+	case reflect.Float64:
+		return c.float64Code(typ, isPtr)
+	case reflect.String:
+		return c.stringCode(typ, isPtr)
+	case reflect.Bool:
+		return c.boolCode(typ, isPtr)
+	case reflect.Interface:
+		return c.interfaceCode(typ, isPtr)
+	default:
+		if isPtr && typ.Implements(marshalTextType) {
+			typ = orgType
+		}
+		return c.typeToCodeWithPtr(typ, isPtr)
+	}
+}
+
+func (c *Compiler) typeToCodeWithPtr(typ *runtime.Type, isPtr bool) (Code, error) {
+	switch {
+	case c.implementsMarshalJSON(typ):
+		return c.marshalJSONCode(typ)
+	case c.implementsMarshalText(typ):
+		return c.marshalTextCode(typ)
+	}
+	switch typ.Kind() {
+	case reflect.Ptr:
+		return c.ptrCode(typ)
+	case reflect.Slice:
+		elem := typ.Elem()
+		if elem.Kind() == reflect.Uint8 {
+			p := runtime.PtrTo(elem)
+			if !c.implementsMarshalJSONType(p) && !p.Implements(marshalTextType) {
+				return c.bytesCode(typ, false)
+			}
+		}
+		return c.sliceCode(typ)
+	case reflect.Array:
+		return c.arrayCode(typ)
+	case reflect.Map:
+		return c.mapCode(typ)
+	case reflect.Struct:
+		return c.structCode(typ, isPtr)
+	case reflect.Interface:
+		return c.interfaceCode(typ, false)
+	case reflect.Int:
+		return c.intCode(typ, false)
+	case reflect.Int8:
+		return c.int8Code(typ, false)
+	case reflect.Int16:
+		return c.int16Code(typ, false)
+	case reflect.Int32:
+		return c.int32Code(typ, false)
+	case reflect.Int64:
+		return c.int64Code(typ, false)
+	case reflect.Uint:
+		return c.uintCode(typ, false)
+	case reflect.Uint8:
+		return c.uint8Code(typ, false)
+	case reflect.Uint16:
+		return c.uint16Code(typ, false)
+	case reflect.Uint32:
+		return c.uint32Code(typ, false)
+	case reflect.Uint64:
+		return c.uint64Code(typ, false)
+	case reflect.Uintptr:
+		return c.uintCode(typ, false)
+	case reflect.Float32:
+		return c.float32Code(typ, false)
+	case reflect.Float64:
+		return c.float64Code(typ, false)
+	case reflect.String:
+		return c.stringCode(typ, false)
+	case reflect.Bool:
+		return c.boolCode(typ, false)
+	}
+	return nil, &errors.UnsupportedTypeError{Type: runtime.RType2Type(typ)}
+}
+
+const intSize = 32 << (^uint(0) >> 63)
+
+//nolint:unparam
+func (c *Compiler) intCode(typ *runtime.Type, isPtr bool) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: intSize, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int8Code(typ *runtime.Type, isPtr bool) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 8, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int16Code(typ *runtime.Type, isPtr bool) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 16, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int32Code(typ *runtime.Type, isPtr bool) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 32, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int64Code(typ *runtime.Type, isPtr bool) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 64, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uintCode(typ *runtime.Type, isPtr bool) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: intSize, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint8Code(typ *runtime.Type, isPtr bool) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 8, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint16Code(typ *runtime.Type, isPtr bool) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 16, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint32Code(typ *runtime.Type, isPtr bool) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 32, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint64Code(typ *runtime.Type, isPtr bool) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 64, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) float32Code(typ *runtime.Type, isPtr bool) (*FloatCode, error) {
+	return &FloatCode{typ: typ, bitSize: 32, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) float64Code(typ *runtime.Type, isPtr bool) (*FloatCode, error) {
+	return &FloatCode{typ: typ, bitSize: 64, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) stringCode(typ *runtime.Type, isPtr bool) (*StringCode, error) {
+	return &StringCode{typ: typ, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) boolCode(typ *runtime.Type, isPtr bool) (*BoolCode, error) {
+	return &BoolCode{typ: typ, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) intStringCode(typ *runtime.Type) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: intSize, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int8StringCode(typ *runtime.Type) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 8, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int16StringCode(typ *runtime.Type) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 16, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int32StringCode(typ *runtime.Type) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 32, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) int64StringCode(typ *runtime.Type) (*IntCode, error) {
+	return &IntCode{typ: typ, bitSize: 64, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uintStringCode(typ *runtime.Type) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: intSize, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint8StringCode(typ *runtime.Type) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 8, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint16StringCode(typ *runtime.Type) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 16, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint32StringCode(typ *runtime.Type) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 32, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) uint64StringCode(typ *runtime.Type) (*UintCode, error) {
+	return &UintCode{typ: typ, bitSize: 64, isString: true}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) bytesCode(typ *runtime.Type, isPtr bool) (*BytesCode, error) {
+	return &BytesCode{typ: typ, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) interfaceCode(typ *runtime.Type, isPtr bool) (*InterfaceCode, error) {
+	return &InterfaceCode{typ: typ, isPtr: isPtr}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) marshalJSONCode(typ *runtime.Type) (*MarshalJSONCode, error) {
+	return &MarshalJSONCode{
+		typ:                typ,
+		isAddrForMarshaler: c.isPtrMarshalJSONType(typ),
+		isNilableType:      c.isNilableType(typ),
+		isMarshalerContext: typ.Implements(marshalJSONContextType) || runtime.PtrTo(typ).Implements(marshalJSONContextType),
+	}, nil
+}
+
+//nolint:unparam
+func (c *Compiler) marshalTextCode(typ *runtime.Type) (*MarshalTextCode, error) {
+	return &MarshalTextCode{
+		typ:                typ,
+		isAddrForMarshaler: c.isPtrMarshalTextType(typ),
+		isNilableType:      c.isNilableType(typ),
+	}, nil
+}
+
+func (c *Compiler) ptrCode(typ *runtime.Type) (*PtrCode, error) {
+	code, err := c.typeToCodeWithPtr(typ.Elem(), true)
+	if err != nil {
+		return nil, err
+	}
+	ptr, ok := code.(*PtrCode)
+	if ok {
+		return &PtrCode{typ: typ, value: ptr.value, ptrNum: ptr.ptrNum + 1}, nil
+	}
+	return &PtrCode{typ: typ, value: code, ptrNum: 1}, nil
+}
+
+func (c *Compiler) sliceCode(typ *runtime.Type) (*SliceCode, error) {
+	elem := typ.Elem()
+	code, err := c.listElemCode(elem)
+	if err != nil {
+		return nil, err
+	}
+	if code.Kind() == CodeKindStruct {
+		structCode := code.(*StructCode)
+		structCode.enableIndirect()
+	}
+	return &SliceCode{typ: typ, value: code}, nil
+}
+
+func (c *Compiler) arrayCode(typ *runtime.Type) (*ArrayCode, error) {
+	elem := typ.Elem()
+	code, err := c.listElemCode(elem)
+	if err != nil {
+		return nil, err
+	}
+	if code.Kind() == CodeKindStruct {
+		structCode := code.(*StructCode)
+		structCode.enableIndirect()
+	}
+	return &ArrayCode{typ: typ, value: code}, nil
+}
+
+func (c *Compiler) mapCode(typ *runtime.Type) (*MapCode, error) {
+	keyCode, err := c.mapKeyCode(typ.Key())
+	if err != nil {
+		return nil, err
+	}
+	valueCode, err := c.mapValueCode(typ.Elem())
+	if err != nil {
+		return nil, err
+	}
+	if valueCode.Kind() == CodeKindStruct {
+		structCode := valueCode.(*StructCode)
+		structCode.enableIndirect()
+	}
+	return &MapCode{typ: typ, key: keyCode, value: valueCode}, nil
+}
+
+func (c *Compiler) listElemCode(typ *runtime.Type) (Code, error) {
+	switch {
+	case c.implementsMarshalJSONType(typ) || c.implementsMarshalJSONType(runtime.PtrTo(typ)):
+		return c.marshalJSONCode(typ)
+	case !typ.Implements(marshalTextType) && runtime.PtrTo(typ).Implements(marshalTextType):
+		return c.marshalTextCode(typ)
+	case typ.Kind() == reflect.Map:
+		return c.ptrCode(runtime.PtrTo(typ))
+	default:
+		// isPtr was originally used to indicate whether the type of top level is pointer.
+		// However, since the slice/array element is a specification that can get the pointer address, explicitly set isPtr to true.
+		// See here for related issues: https://github.com/goccy/go-json/issues/370
+		code, err := c.typeToCodeWithPtr(typ, true)
+		if err != nil {
+			return nil, err
+		}
+		ptr, ok := code.(*PtrCode)
+		if ok {
+			if ptr.value.Kind() == CodeKindMap {
+				ptr.ptrNum++
+			}
+		}
+		return code, nil
+	}
+}
+
+func (c *Compiler) mapKeyCode(typ *runtime.Type) (Code, error) {
+	switch {
+	case c.implementsMarshalText(typ):
+		return c.marshalTextCode(typ)
+	}
+	switch typ.Kind() {
+	case reflect.Ptr:
+		return c.ptrCode(typ)
+	case reflect.String:
+		return c.stringCode(typ, false)
+	case reflect.Int:
+		return c.intStringCode(typ)
+	case reflect.Int8:
+		return c.int8StringCode(typ)
+	case reflect.Int16:
+		return c.int16StringCode(typ)
+	case reflect.Int32:
+		return c.int32StringCode(typ)
+	case reflect.Int64:
+		return c.int64StringCode(typ)
+	case reflect.Uint:
+		return c.uintStringCode(typ)
+	case reflect.Uint8:
+		return c.uint8StringCode(typ)
+	case reflect.Uint16:
+		return c.uint16StringCode(typ)
+	case reflect.Uint32:
+		return c.uint32StringCode(typ)
+	case reflect.Uint64:
+		return c.uint64StringCode(typ)
+	case reflect.Uintptr:
+		return c.uintStringCode(typ)
+	}
+	return nil, &errors.UnsupportedTypeError{Type: runtime.RType2Type(typ)}
+}
+
+func (c *Compiler) mapValueCode(typ *runtime.Type) (Code, error) {
+	switch typ.Kind() {
+	case reflect.Map:
+		return c.ptrCode(runtime.PtrTo(typ))
+	default:
+		code, err := c.typeToCodeWithPtr(typ, false)
+		if err != nil {
+			return nil, err
+		}
+		ptr, ok := code.(*PtrCode)
+		if ok {
+			if ptr.value.Kind() == CodeKindMap {
+				ptr.ptrNum++
+			}
+		}
+		return code, nil
+	}
+}
+
+func (c *Compiler) structCode(typ *runtime.Type, isPtr bool) (*StructCode, error) {
+	typeptr := uintptr(unsafe.Pointer(typ))
+	if code, exists := c.structTypeToCode[typeptr]; exists {
+		derefCode := *code
+		derefCode.isRecursive = true
+		return &derefCode, nil
+	}
+	indirect := runtime.IfaceIndir(typ)
+	code := &StructCode{typ: typ, isPtr: isPtr, isIndirect: indirect}
+	c.structTypeToCode[typeptr] = code
+
+	fieldNum := typ.NumField()
+	tags := c.typeToStructTags(typ)
+	fields := []*StructFieldCode{}
+	for i, tag := range tags {
+		isOnlyOneFirstField := i == 0 && fieldNum == 1
+		field, err := c.structFieldCode(code, tag, isPtr, isOnlyOneFirstField)
+		if err != nil {
+			return nil, err
+		}
+		if field.isAnonymous {
+			structCode := field.getAnonymousStruct()
+			if structCode != nil {
+				structCode.removeFieldsByTags(tags)
+				if c.isAssignableIndirect(field, isPtr) {
+					if indirect {
+						structCode.isIndirect = true
+					} else {
+						structCode.isIndirect = false
+					}
+				}
+			}
+		} else {
+			structCode := field.getStruct()
+			if structCode != nil {
+				if indirect {
+					// if parent is indirect type, set child indirect property to true
+					structCode.isIndirect = true
+				} else {
+					// if parent is not indirect type, set child indirect property to false.
+					// but if parent's indirect is false and isPtr is true, then indirect must be true.
+					// Do this only if indirectConversion is enabled at the end of compileStruct.
+					structCode.isIndirect = false
+				}
+			}
+		}
+		fields = append(fields, field)
+	}
+	fieldMap := c.getFieldMap(fields)
+	duplicatedFieldMap := c.getDuplicatedFieldMap(fieldMap)
+	code.fields = c.filteredDuplicatedFields(fields, duplicatedFieldMap)
+	if !code.disableIndirectConversion && !indirect && isPtr {
+		code.enableIndirect()
+	}
+	delete(c.structTypeToCode, typeptr)
+	return code, nil
+}
+
+func toElemType(t *runtime.Type) *runtime.Type {
+	for t.Kind() == reflect.Ptr {
+		t = t.Elem()
+	}
+	return t
+}
+
+func (c *Compiler) structFieldCode(structCode *StructCode, tag *runtime.StructTag, isPtr, isOnlyOneFirstField bool) (*StructFieldCode, error) {
+	field := tag.Field
+	fieldType := runtime.Type2RType(field.Type)
+	isIndirectSpecialCase := isPtr && isOnlyOneFirstField
+	fieldCode := &StructFieldCode{
+		typ:           fieldType,
+		key:           tag.Key,
+		tag:           tag,
+		offset:        field.Offset,
+		isAnonymous:   field.Anonymous && !tag.IsTaggedKey && toElemType(fieldType).Kind() == reflect.Struct,
+		isTaggedKey:   tag.IsTaggedKey,
+		isNilableType: c.isNilableType(fieldType),
+		isNilCheck:    true,
+	}
+	switch {
+	case c.isMovePointerPositionFromHeadToFirstMarshalJSONFieldCase(fieldType, isIndirectSpecialCase):
+		code, err := c.marshalJSONCode(fieldType)
+		if err != nil {
+			return nil, err
+		}
+		fieldCode.value = code
+		fieldCode.isAddrForMarshaler = true
+		fieldCode.isNilCheck = false
+		structCode.isIndirect = false
+		structCode.disableIndirectConversion = true
+	case c.isMovePointerPositionFromHeadToFirstMarshalTextFieldCase(fieldType, isIndirectSpecialCase):
+		code, err := c.marshalTextCode(fieldType)
+		if err != nil {
+			return nil, err
+		}
+		fieldCode.value = code
+		fieldCode.isAddrForMarshaler = true
+		fieldCode.isNilCheck = false
+		structCode.isIndirect = false
+		structCode.disableIndirectConversion = true
+	case isPtr && c.isPtrMarshalJSONType(fieldType):
+		// *struct{ field T }
+		// func (*T) MarshalJSON() ([]byte, error)
+		code, err := c.marshalJSONCode(fieldType)
+		if err != nil {
+			return nil, err
+		}
+		fieldCode.value = code
+		fieldCode.isAddrForMarshaler = true
+		fieldCode.isNilCheck = false
+	case isPtr && c.isPtrMarshalTextType(fieldType):
+		// *struct{ field T }
+		// func (*T) MarshalText() ([]byte, error)
+		code, err := c.marshalTextCode(fieldType)
+		if err != nil {
+			return nil, err
+		}
+		fieldCode.value = code
+		fieldCode.isAddrForMarshaler = true
+		fieldCode.isNilCheck = false
+	default:
+		code, err := c.typeToCodeWithPtr(fieldType, isPtr)
+		if err != nil {
+			return nil, err
+		}
+		switch code.Kind() {
+		case CodeKindPtr, CodeKindInterface:
+			fieldCode.isNextOpPtrType = true
+		}
+		fieldCode.value = code
+	}
+	return fieldCode, nil
+}
+
+func (c *Compiler) isAssignableIndirect(fieldCode *StructFieldCode, isPtr bool) bool {
+	if isPtr {
+		return false
+	}
+	codeType := fieldCode.value.Kind()
+	if codeType == CodeKindMarshalJSON {
+		return false
+	}
+	if codeType == CodeKindMarshalText {
+		return false
+	}
+	return true
+}
+
+func (c *Compiler) getFieldMap(fields []*StructFieldCode) map[string][]*StructFieldCode {
+	fieldMap := map[string][]*StructFieldCode{}
+	for _, field := range fields {
+		if field.isAnonymous {
+			for k, v := range c.getAnonymousFieldMap(field) {
+				fieldMap[k] = append(fieldMap[k], v...)
+			}
+			continue
+		}
+		fieldMap[field.key] = append(fieldMap[field.key], field)
+	}
+	return fieldMap
+}
+
+func (c *Compiler) getAnonymousFieldMap(field *StructFieldCode) map[string][]*StructFieldCode {
+	fieldMap := map[string][]*StructFieldCode{}
+	structCode := field.getAnonymousStruct()
+	if structCode == nil || structCode.isRecursive {
+		fieldMap[field.key] = append(fieldMap[field.key], field)
+		return fieldMap
+	}
+	for k, v := range c.getFieldMapFromAnonymousParent(structCode.fields) {
+		fieldMap[k] = append(fieldMap[k], v...)
+	}
+	return fieldMap
+}
+
+func (c *Compiler) getFieldMapFromAnonymousParent(fields []*StructFieldCode) map[string][]*StructFieldCode {
+	fieldMap := map[string][]*StructFieldCode{}
+	for _, field := range fields {
+		if field.isAnonymous {
+			for k, v := range c.getAnonymousFieldMap(field) {
+				// Do not handle tagged key when embedding more than once
+				for _, vv := range v {
+					vv.isTaggedKey = false
+				}
+				fieldMap[k] = append(fieldMap[k], v...)
+			}
+			continue
+		}
+		fieldMap[field.key] = append(fieldMap[field.key], field)
+	}
+	return fieldMap
+}
+
+func (c *Compiler) getDuplicatedFieldMap(fieldMap map[string][]*StructFieldCode) map[*StructFieldCode]struct{} {
+	duplicatedFieldMap := map[*StructFieldCode]struct{}{}
+	for _, fields := range fieldMap {
+		if len(fields) == 1 {
+			continue
+		}
+		if c.isTaggedKeyOnly(fields) {
+			for _, field := range fields {
+				if field.isTaggedKey {
+					continue
+				}
+				duplicatedFieldMap[field] = struct{}{}
+			}
+		} else {
+			for _, field := range fields {
+				duplicatedFieldMap[field] = struct{}{}
+			}
+		}
+	}
+	return duplicatedFieldMap
+}
+
+func (c *Compiler) filteredDuplicatedFields(fields []*StructFieldCode, duplicatedFieldMap map[*StructFieldCode]struct{}) []*StructFieldCode {
+	filteredFields := make([]*StructFieldCode, 0, len(fields))
+	for _, field := range fields {
+		if field.isAnonymous {
+			structCode := field.getAnonymousStruct()
+			if structCode != nil && !structCode.isRecursive {
+				structCode.fields = c.filteredDuplicatedFields(structCode.fields, duplicatedFieldMap)
+				if len(structCode.fields) > 0 {
+					filteredFields = append(filteredFields, field)
+				}
+				continue
+			}
+		}
+		if _, exists := duplicatedFieldMap[field]; exists {
+			continue
+		}
+		filteredFields = append(filteredFields, field)
+	}
+	return filteredFields
+}
+
+func (c *Compiler) isTaggedKeyOnly(fields []*StructFieldCode) bool {
+	var taggedKeyFieldCount int
+	for _, field := range fields {
+		if field.isTaggedKey {
+			taggedKeyFieldCount++
+		}
+	}
+	return taggedKeyFieldCount == 1
+}
+
+func (c *Compiler) typeToStructTags(typ *runtime.Type) runtime.StructTags {
+	tags := runtime.StructTags{}
+	fieldNum := typ.NumField()
+	for i := 0; i < fieldNum; i++ {
+		field := typ.Field(i)
+		if runtime.IsIgnoredStructField(field) {
+			continue
+		}
+		tags = append(tags, runtime.StructTagFromField(field))
+	}
+	return tags
+}
+
+// *struct{ field T } => struct { field *T }
+// func (*T) MarshalJSON() ([]byte, error)
+func (c *Compiler) isMovePointerPositionFromHeadToFirstMarshalJSONFieldCase(typ *runtime.Type, isIndirectSpecialCase bool) bool {
+	return isIndirectSpecialCase && !c.isNilableType(typ) && c.isPtrMarshalJSONType(typ)
+}
+
+// *struct{ field T } => struct { field *T }
+// func (*T) MarshalText() ([]byte, error)
+func (c *Compiler) isMovePointerPositionFromHeadToFirstMarshalTextFieldCase(typ *runtime.Type, isIndirectSpecialCase bool) bool {
+	return isIndirectSpecialCase && !c.isNilableType(typ) && c.isPtrMarshalTextType(typ)
+}
+
+func (c *Compiler) implementsMarshalJSON(typ *runtime.Type) bool {
+	if !c.implementsMarshalJSONType(typ) {
+		return false
+	}
+	if typ.Kind() != reflect.Ptr {
+		return true
+	}
+	// type kind is reflect.Ptr
+	if !c.implementsMarshalJSONType(typ.Elem()) {
+		return true
+	}
+	// needs to dereference
+	return false
+}
+
+func (c *Compiler) implementsMarshalText(typ *runtime.Type) bool {
+	if !typ.Implements(marshalTextType) {
+		return false
+	}
+	if typ.Kind() != reflect.Ptr {
+		return true
+	}
+	// type kind is reflect.Ptr
+	if !typ.Elem().Implements(marshalTextType) {
+		return true
+	}
+	// needs to dereference
+	return false
+}
+
+func (c *Compiler) isNilableType(typ *runtime.Type) bool {
+	if !runtime.IfaceIndir(typ) {
+		return true
+	}
+	switch typ.Kind() {
+	case reflect.Ptr:
+		return true
+	case reflect.Map:
+		return true
+	case reflect.Func:
+		return true
+	default:
+		return false
+	}
+}
+
+func (c *Compiler) implementsMarshalJSONType(typ *runtime.Type) bool {
+	return typ.Implements(marshalJSONType) || typ.Implements(marshalJSONContextType)
+}
+
+func (c *Compiler) isPtrMarshalJSONType(typ *runtime.Type) bool {
+	return !c.implementsMarshalJSONType(typ) && c.implementsMarshalJSONType(runtime.PtrTo(typ))
+}
+
+func (c *Compiler) isPtrMarshalTextType(typ *runtime.Type) bool {
+	return !typ.Implements(marshalTextType) && runtime.PtrTo(typ).Implements(marshalTextType)
+}
+
+func (c *Compiler) codeToOpcode(ctx *compileContext, typ *runtime.Type, code Code) *Opcode {
+	codes := code.ToOpcode(ctx)
+	codes.Last().Next = newEndOp(ctx, typ)
+	c.linkRecursiveCode(ctx)
+	return codes.First()
+}
+
+func (c *Compiler) linkRecursiveCode(ctx *compileContext) {
+	recursiveCodes := map[uintptr]*CompiledCode{}
+	for _, recursive := range *ctx.recursiveCodes {
+		typeptr := uintptr(unsafe.Pointer(recursive.Type))
+		codes := ctx.structTypeToCodes[typeptr]
+		if recursiveCode, ok := recursiveCodes[typeptr]; ok {
+			*recursive.Jmp = *recursiveCode
+			continue
+		}
+
+		code := copyOpcode(codes.First())
+		code.Op = code.Op.PtrHeadToHead()
+		lastCode := newEndOp(&compileContext{}, recursive.Type)
+		lastCode.Op = OpRecursiveEnd
+
+		// OpRecursiveEnd must set before call TotalLength
+		code.End.Next = lastCode
+
+		totalLength := code.TotalLength()
+
+		// Idx, ElemIdx, Length must set after call TotalLength
+		lastCode.Idx = uint32((totalLength + 1) * uintptrSize)
+		lastCode.ElemIdx = lastCode.Idx + uintptrSize
+		lastCode.Length = lastCode.Idx + 2*uintptrSize
+
+		// extend length to alloc slot for elemIdx + length
+		curTotalLength := uintptr(recursive.TotalLength()) + 3
+		nextTotalLength := uintptr(totalLength) + 3
+
+		compiled := recursive.Jmp
+		compiled.Code = code
+		compiled.CurLen = curTotalLength
+		compiled.NextLen = nextTotalLength
+		compiled.Linked = true
+
+		recursiveCodes[typeptr] = compiled
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go b/vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go
new file mode 100644
index 00000000..20c93cbf
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/compiler_norace.go
@@ -0,0 +1,32 @@
+//go:build !race
+// +build !race
+
+package encoder
+
+func CompileToGetCodeSet(ctx *RuntimeContext, typeptr uintptr) (*OpcodeSet, error) {
+	if typeptr > typeAddr.MaxTypeAddr || typeptr < typeAddr.BaseTypeAddr {
+		codeSet, err := compileToGetCodeSetSlowPath(typeptr)
+		if err != nil {
+			return nil, err
+		}
+		return getFilteredCodeSetIfNeeded(ctx, codeSet)
+	}
+	index := (typeptr - typeAddr.BaseTypeAddr) >> typeAddr.AddrShift
+	if codeSet := cachedOpcodeSets[index]; codeSet != nil {
+		filtered, err := getFilteredCodeSetIfNeeded(ctx, codeSet)
+		if err != nil {
+			return nil, err
+		}
+		return filtered, nil
+	}
+	codeSet, err := newCompiler().compile(typeptr)
+	if err != nil {
+		return nil, err
+	}
+	filtered, err := getFilteredCodeSetIfNeeded(ctx, codeSet)
+	if err != nil {
+		return nil, err
+	}
+	cachedOpcodeSets[index] = codeSet
+	return filtered, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go b/vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go
new file mode 100644
index 00000000..13ba23fd
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/compiler_race.go
@@ -0,0 +1,45 @@
+//go:build race
+// +build race
+
+package encoder
+
+import (
+	"sync"
+)
+
+var setsMu sync.RWMutex
+
+func CompileToGetCodeSet(ctx *RuntimeContext, typeptr uintptr) (*OpcodeSet, error) {
+	if typeptr > typeAddr.MaxTypeAddr || typeptr < typeAddr.BaseTypeAddr {
+		codeSet, err := compileToGetCodeSetSlowPath(typeptr)
+		if err != nil {
+			return nil, err
+		}
+		return getFilteredCodeSetIfNeeded(ctx, codeSet)
+	}
+	index := (typeptr - typeAddr.BaseTypeAddr) >> typeAddr.AddrShift
+	setsMu.RLock()
+	if codeSet := cachedOpcodeSets[index]; codeSet != nil {
+		filtered, err := getFilteredCodeSetIfNeeded(ctx, codeSet)
+		if err != nil {
+			setsMu.RUnlock()
+			return nil, err
+		}
+		setsMu.RUnlock()
+		return filtered, nil
+	}
+	setsMu.RUnlock()
+
+	codeSet, err := newCompiler().compile(typeptr)
+	if err != nil {
+		return nil, err
+	}
+	filtered, err := getFilteredCodeSetIfNeeded(ctx, codeSet)
+	if err != nil {
+		return nil, err
+	}
+	setsMu.Lock()
+	cachedOpcodeSets[index] = codeSet
+	setsMu.Unlock()
+	return filtered, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/context.go b/vendor/github.com/goccy/go-json/internal/encoder/context.go
new file mode 100644
index 00000000..3833d0c8
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/context.go
@@ -0,0 +1,105 @@
+package encoder
+
+import (
+	"context"
+	"sync"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+type compileContext struct {
+	opcodeIndex       uint32
+	ptrIndex          int
+	indent            uint32
+	escapeKey         bool
+	structTypeToCodes map[uintptr]Opcodes
+	recursiveCodes    *Opcodes
+}
+
+func (c *compileContext) incIndent() {
+	c.indent++
+}
+
+func (c *compileContext) decIndent() {
+	c.indent--
+}
+
+func (c *compileContext) incIndex() {
+	c.incOpcodeIndex()
+	c.incPtrIndex()
+}
+
+func (c *compileContext) decIndex() {
+	c.decOpcodeIndex()
+	c.decPtrIndex()
+}
+
+func (c *compileContext) incOpcodeIndex() {
+	c.opcodeIndex++
+}
+
+func (c *compileContext) decOpcodeIndex() {
+	c.opcodeIndex--
+}
+
+func (c *compileContext) incPtrIndex() {
+	c.ptrIndex++
+}
+
+func (c *compileContext) decPtrIndex() {
+	c.ptrIndex--
+}
+
+const (
+	bufSize = 1024
+)
+
+var (
+	runtimeContextPool = sync.Pool{
+		New: func() interface{} {
+			return &RuntimeContext{
+				Buf:      make([]byte, 0, bufSize),
+				Ptrs:     make([]uintptr, 128),
+				KeepRefs: make([]unsafe.Pointer, 0, 8),
+				Option:   &Option{},
+			}
+		},
+	}
+)
+
+type RuntimeContext struct {
+	Context    context.Context
+	Buf        []byte
+	MarshalBuf []byte
+	Ptrs       []uintptr
+	KeepRefs   []unsafe.Pointer
+	SeenPtr    []uintptr
+	BaseIndent uint32
+	Prefix     []byte
+	IndentStr  []byte
+	Option     *Option
+}
+
+func (c *RuntimeContext) Init(p uintptr, codelen int) {
+	if len(c.Ptrs) < codelen {
+		c.Ptrs = make([]uintptr, codelen)
+	}
+	c.Ptrs[0] = p
+	c.KeepRefs = c.KeepRefs[:0]
+	c.SeenPtr = c.SeenPtr[:0]
+	c.BaseIndent = 0
+}
+
+func (c *RuntimeContext) Ptr() uintptr {
+	header := (*runtime.SliceHeader)(unsafe.Pointer(&c.Ptrs))
+	return uintptr(header.Data)
+}
+
+func TakeRuntimeContext() *RuntimeContext {
+	return runtimeContextPool.Get().(*RuntimeContext)
+}
+
+func ReleaseRuntimeContext(ctx *RuntimeContext) {
+	runtimeContextPool.Put(ctx)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/decode_rune.go b/vendor/github.com/goccy/go-json/internal/encoder/decode_rune.go
new file mode 100644
index 00000000..35c959d4
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/decode_rune.go
@@ -0,0 +1,126 @@
+package encoder
+
+import "unicode/utf8"
+
+const (
+	// The default lowest and highest continuation byte.
+	locb = 128 //0b10000000
+	hicb = 191 //0b10111111
+
+	// These names of these constants are chosen to give nice alignment in the
+	// table below. The first nibble is an index into acceptRanges or F for
+	// special one-byte cases. The second nibble is the Rune length or the
+	// Status for the special one-byte case.
+	xx = 0xF1 // invalid: size 1
+	as = 0xF0 // ASCII: size 1
+	s1 = 0x02 // accept 0, size 2
+	s2 = 0x13 // accept 1, size 3
+	s3 = 0x03 // accept 0, size 3
+	s4 = 0x23 // accept 2, size 3
+	s5 = 0x34 // accept 3, size 4
+	s6 = 0x04 // accept 0, size 4
+	s7 = 0x44 // accept 4, size 4
+)
+
+// first is information about the first byte in a UTF-8 sequence.
+var first = [256]uint8{
+	//   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x00-0x0F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x10-0x1F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x20-0x2F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x30-0x3F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x40-0x4F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x50-0x5F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x60-0x6F
+	as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, as, // 0x70-0x7F
+	//   1   2   3   4   5   6   7   8   9   A   B   C   D   E   F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x80-0x8F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0x90-0x9F
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xA0-0xAF
+	xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xB0-0xBF
+	xx, xx, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xC0-0xCF
+	s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, s1, // 0xD0-0xDF
+	s2, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s3, s4, s3, s3, // 0xE0-0xEF
+	s5, s6, s6, s6, s7, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, xx, // 0xF0-0xFF
+}
+
+const (
+	lineSep      = byte(168) //'\u2028'
+	paragraphSep = byte(169) //'\u2029'
+)
+
+type decodeRuneState int
+
+const (
+	validUTF8State decodeRuneState = iota
+	runeErrorState
+	lineSepState
+	paragraphSepState
+)
+
+func decodeRuneInString(s string) (decodeRuneState, int) {
+	n := len(s)
+	s0 := s[0]
+	x := first[s0]
+	if x >= as {
+		// The following code simulates an additional check for x == xx and
+		// handling the ASCII and invalid cases accordingly. This mask-and-or
+		// approach prevents an additional branch.
+		mask := rune(x) << 31 >> 31 // Create 0x0000 or 0xFFFF.
+		if rune(s[0])&^mask|utf8.RuneError&mask == utf8.RuneError {
+			return runeErrorState, 1
+		}
+		return validUTF8State, 1
+	}
+	sz := int(x & 7)
+	if n < sz {
+		return runeErrorState, 1
+	}
+	s1 := s[1]
+	switch x >> 4 {
+	case 0:
+		if s1 < locb || hicb < s1 {
+			return runeErrorState, 1
+		}
+	case 1:
+		if s1 < 0xA0 || hicb < s1 {
+			return runeErrorState, 1
+		}
+	case 2:
+		if s1 < locb || 0x9F < s1 {
+			return runeErrorState, 1
+		}
+	case 3:
+		if s1 < 0x90 || hicb < s1 {
+			return runeErrorState, 1
+		}
+	case 4:
+		if s1 < locb || 0x8F < s1 {
+			return runeErrorState, 1
+		}
+	}
+	if sz <= 2 {
+		return validUTF8State, 2
+	}
+	s2 := s[2]
+	if s2 < locb || hicb < s2 {
+		return runeErrorState, 1
+	}
+	if sz <= 3 {
+		// separator character prefixes: [2]byte{226, 128}
+		if s0 == 226 && s1 == 128 {
+			switch s2 {
+			case lineSep:
+				return lineSepState, 3
+			case paragraphSep:
+				return paragraphSepState, 3
+			}
+		}
+		return validUTF8State, 3
+	}
+	s3 := s[3]
+	if s3 < locb || hicb < s3 {
+		return runeErrorState, 1
+	}
+	return validUTF8State, 4
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/encoder.go b/vendor/github.com/goccy/go-json/internal/encoder/encoder.go
new file mode 100644
index 00000000..14eb6a0d
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/encoder.go
@@ -0,0 +1,596 @@
+package encoder
+
+import (
+	"bytes"
+	"encoding"
+	"encoding/base64"
+	"encoding/json"
+	"fmt"
+	"math"
+	"reflect"
+	"strconv"
+	"strings"
+	"sync"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/errors"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+func (t OpType) IsMultipleOpHead() bool {
+	switch t {
+	case OpStructHead:
+		return true
+	case OpStructHeadSlice:
+		return true
+	case OpStructHeadArray:
+		return true
+	case OpStructHeadMap:
+		return true
+	case OpStructHeadStruct:
+		return true
+	case OpStructHeadOmitEmpty:
+		return true
+	case OpStructHeadOmitEmptySlice:
+		return true
+	case OpStructHeadOmitEmptyArray:
+		return true
+	case OpStructHeadOmitEmptyMap:
+		return true
+	case OpStructHeadOmitEmptyStruct:
+		return true
+	case OpStructHeadSlicePtr:
+		return true
+	case OpStructHeadOmitEmptySlicePtr:
+		return true
+	case OpStructHeadArrayPtr:
+		return true
+	case OpStructHeadOmitEmptyArrayPtr:
+		return true
+	case OpStructHeadMapPtr:
+		return true
+	case OpStructHeadOmitEmptyMapPtr:
+		return true
+	}
+	return false
+}
+
+func (t OpType) IsMultipleOpField() bool {
+	switch t {
+	case OpStructField:
+		return true
+	case OpStructFieldSlice:
+		return true
+	case OpStructFieldArray:
+		return true
+	case OpStructFieldMap:
+		return true
+	case OpStructFieldStruct:
+		return true
+	case OpStructFieldOmitEmpty:
+		return true
+	case OpStructFieldOmitEmptySlice:
+		return true
+	case OpStructFieldOmitEmptyArray:
+		return true
+	case OpStructFieldOmitEmptyMap:
+		return true
+	case OpStructFieldOmitEmptyStruct:
+		return true
+	case OpStructFieldSlicePtr:
+		return true
+	case OpStructFieldOmitEmptySlicePtr:
+		return true
+	case OpStructFieldArrayPtr:
+		return true
+	case OpStructFieldOmitEmptyArrayPtr:
+		return true
+	case OpStructFieldMapPtr:
+		return true
+	case OpStructFieldOmitEmptyMapPtr:
+		return true
+	}
+	return false
+}
+
+type OpcodeSet struct {
+	Type                     *runtime.Type
+	NoescapeKeyCode          *Opcode
+	EscapeKeyCode            *Opcode
+	InterfaceNoescapeKeyCode *Opcode
+	InterfaceEscapeKeyCode   *Opcode
+	CodeLength               int
+	EndCode                  *Opcode
+	Code                     Code
+	QueryCache               map[string]*OpcodeSet
+	cacheMu                  sync.RWMutex
+}
+
+func (s *OpcodeSet) getQueryCache(hash string) *OpcodeSet {
+	s.cacheMu.RLock()
+	codeSet := s.QueryCache[hash]
+	s.cacheMu.RUnlock()
+	return codeSet
+}
+
+func (s *OpcodeSet) setQueryCache(hash string, codeSet *OpcodeSet) {
+	s.cacheMu.Lock()
+	s.QueryCache[hash] = codeSet
+	s.cacheMu.Unlock()
+}
+
+type CompiledCode struct {
+	Code    *Opcode
+	Linked  bool // whether recursive code already have linked
+	CurLen  uintptr
+	NextLen uintptr
+}
+
+const StartDetectingCyclesAfter = 1000
+
+func Load(base uintptr, idx uintptr) uintptr {
+	addr := base + idx
+	return **(**uintptr)(unsafe.Pointer(&addr))
+}
+
+func Store(base uintptr, idx uintptr, p uintptr) {
+	addr := base + idx
+	**(**uintptr)(unsafe.Pointer(&addr)) = p
+}
+
+func LoadNPtr(base uintptr, idx uintptr, ptrNum int) uintptr {
+	addr := base + idx
+	p := **(**uintptr)(unsafe.Pointer(&addr))
+	if p == 0 {
+		return 0
+	}
+	return PtrToPtr(p)
+	/*
+		for i := 0; i < ptrNum; i++ {
+			if p == 0 {
+				return p
+			}
+			p = PtrToPtr(p)
+		}
+		return p
+	*/
+}
+
+func PtrToUint64(p uintptr) uint64              { return **(**uint64)(unsafe.Pointer(&p)) }
+func PtrToFloat32(p uintptr) float32            { return **(**float32)(unsafe.Pointer(&p)) }
+func PtrToFloat64(p uintptr) float64            { return **(**float64)(unsafe.Pointer(&p)) }
+func PtrToBool(p uintptr) bool                  { return **(**bool)(unsafe.Pointer(&p)) }
+func PtrToBytes(p uintptr) []byte               { return **(**[]byte)(unsafe.Pointer(&p)) }
+func PtrToNumber(p uintptr) json.Number         { return **(**json.Number)(unsafe.Pointer(&p)) }
+func PtrToString(p uintptr) string              { return **(**string)(unsafe.Pointer(&p)) }
+func PtrToSlice(p uintptr) *runtime.SliceHeader { return *(**runtime.SliceHeader)(unsafe.Pointer(&p)) }
+func PtrToPtr(p uintptr) uintptr {
+	return uintptr(**(**unsafe.Pointer)(unsafe.Pointer(&p)))
+}
+func PtrToNPtr(p uintptr, ptrNum int) uintptr {
+	for i := 0; i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = PtrToPtr(p)
+	}
+	return p
+}
+
+func PtrToUnsafePtr(p uintptr) unsafe.Pointer {
+	return *(*unsafe.Pointer)(unsafe.Pointer(&p))
+}
+func PtrToInterface(code *Opcode, p uintptr) interface{} {
+	return *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: code.Type,
+		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
+	}))
+}
+
+func ErrUnsupportedValue(code *Opcode, ptr uintptr) *errors.UnsupportedValueError {
+	v := *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: code.Type,
+		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&ptr)),
+	}))
+	return &errors.UnsupportedValueError{
+		Value: reflect.ValueOf(v),
+		Str:   fmt.Sprintf("encountered a cycle via %s", code.Type),
+	}
+}
+
+func ErrUnsupportedFloat(v float64) *errors.UnsupportedValueError {
+	return &errors.UnsupportedValueError{
+		Value: reflect.ValueOf(v),
+		Str:   strconv.FormatFloat(v, 'g', -1, 64),
+	}
+}
+
+func ErrMarshalerWithCode(code *Opcode, err error) *errors.MarshalerError {
+	return &errors.MarshalerError{
+		Type: runtime.RType2Type(code.Type),
+		Err:  err,
+	}
+}
+
+type emptyInterface struct {
+	typ *runtime.Type
+	ptr unsafe.Pointer
+}
+
+type MapItem struct {
+	Key   []byte
+	Value []byte
+}
+
+type Mapslice struct {
+	Items []MapItem
+}
+
+func (m *Mapslice) Len() int {
+	return len(m.Items)
+}
+
+func (m *Mapslice) Less(i, j int) bool {
+	return bytes.Compare(m.Items[i].Key, m.Items[j].Key) < 0
+}
+
+func (m *Mapslice) Swap(i, j int) {
+	m.Items[i], m.Items[j] = m.Items[j], m.Items[i]
+}
+
+//nolint:structcheck,unused
+type mapIter struct {
+	key         unsafe.Pointer
+	elem        unsafe.Pointer
+	t           unsafe.Pointer
+	h           unsafe.Pointer
+	buckets     unsafe.Pointer
+	bptr        unsafe.Pointer
+	overflow    unsafe.Pointer
+	oldoverflow unsafe.Pointer
+	startBucket uintptr
+	offset      uint8
+	wrapped     bool
+	B           uint8
+	i           uint8
+	bucket      uintptr
+	checkBucket uintptr
+}
+
+type MapContext struct {
+	Start int
+	First int
+	Idx   int
+	Slice *Mapslice
+	Buf   []byte
+	Len   int
+	Iter  mapIter
+}
+
+var mapContextPool = sync.Pool{
+	New: func() interface{} {
+		return &MapContext{
+			Slice: &Mapslice{},
+		}
+	},
+}
+
+func NewMapContext(mapLen int, unorderedMap bool) *MapContext {
+	ctx := mapContextPool.Get().(*MapContext)
+	if !unorderedMap {
+		if len(ctx.Slice.Items) < mapLen {
+			ctx.Slice.Items = make([]MapItem, mapLen)
+		} else {
+			ctx.Slice.Items = ctx.Slice.Items[:mapLen]
+		}
+	}
+	ctx.Buf = ctx.Buf[:0]
+	ctx.Iter = mapIter{}
+	ctx.Idx = 0
+	ctx.Len = mapLen
+	return ctx
+}
+
+func ReleaseMapContext(c *MapContext) {
+	mapContextPool.Put(c)
+}
+
+//go:linkname MapIterInit runtime.mapiterinit
+//go:noescape
+func MapIterInit(mapType *runtime.Type, m unsafe.Pointer, it *mapIter)
+
+//go:linkname MapIterKey reflect.mapiterkey
+//go:noescape
+func MapIterKey(it *mapIter) unsafe.Pointer
+
+//go:linkname MapIterNext reflect.mapiternext
+//go:noescape
+func MapIterNext(it *mapIter)
+
+//go:linkname MapLen reflect.maplen
+//go:noescape
+func MapLen(m unsafe.Pointer) int
+
+func AppendByteSlice(_ *RuntimeContext, b []byte, src []byte) []byte {
+	if src == nil {
+		return append(b, `null`...)
+	}
+	encodedLen := base64.StdEncoding.EncodedLen(len(src))
+	b = append(b, '"')
+	pos := len(b)
+	remainLen := cap(b[pos:])
+	var buf []byte
+	if remainLen > encodedLen {
+		buf = b[pos : pos+encodedLen]
+	} else {
+		buf = make([]byte, encodedLen)
+	}
+	base64.StdEncoding.Encode(buf, src)
+	return append(append(b, buf...), '"')
+}
+
+func AppendFloat32(_ *RuntimeContext, b []byte, v float32) []byte {
+	f64 := float64(v)
+	abs := math.Abs(f64)
+	fmt := byte('f')
+	// Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right.
+	if abs != 0 {
+		f32 := float32(abs)
+		if f32 < 1e-6 || f32 >= 1e21 {
+			fmt = 'e'
+		}
+	}
+	return strconv.AppendFloat(b, f64, fmt, -1, 32)
+}
+
+func AppendFloat64(_ *RuntimeContext, b []byte, v float64) []byte {
+	abs := math.Abs(v)
+	fmt := byte('f')
+	// Note: Must use float32 comparisons for underlying float32 value to get precise cutoffs right.
+	if abs != 0 {
+		if abs < 1e-6 || abs >= 1e21 {
+			fmt = 'e'
+		}
+	}
+	return strconv.AppendFloat(b, v, fmt, -1, 64)
+}
+
+func AppendBool(_ *RuntimeContext, b []byte, v bool) []byte {
+	if v {
+		return append(b, "true"...)
+	}
+	return append(b, "false"...)
+}
+
+var (
+	floatTable = [256]bool{
+		'0': true,
+		'1': true,
+		'2': true,
+		'3': true,
+		'4': true,
+		'5': true,
+		'6': true,
+		'7': true,
+		'8': true,
+		'9': true,
+		'.': true,
+		'e': true,
+		'E': true,
+		'+': true,
+		'-': true,
+	}
+)
+
+func AppendNumber(_ *RuntimeContext, b []byte, n json.Number) ([]byte, error) {
+	if len(n) == 0 {
+		return append(b, '0'), nil
+	}
+	for i := 0; i < len(n); i++ {
+		if !floatTable[n[i]] {
+			return nil, fmt.Errorf("json: invalid number literal %q", n)
+		}
+	}
+	b = append(b, n...)
+	return b, nil
+}
+
+func AppendMarshalJSON(ctx *RuntimeContext, code *Opcode, b []byte, v interface{}) ([]byte, error) {
+	rv := reflect.ValueOf(v) // convert by dynamic interface type
+	if (code.Flags & AddrForMarshalerFlags) != 0 {
+		if rv.CanAddr() {
+			rv = rv.Addr()
+		} else {
+			newV := reflect.New(rv.Type())
+			newV.Elem().Set(rv)
+			rv = newV
+		}
+	}
+	v = rv.Interface()
+	var bb []byte
+	if (code.Flags & MarshalerContextFlags) != 0 {
+		marshaler, ok := v.(marshalerContext)
+		if !ok {
+			return AppendNull(ctx, b), nil
+		}
+		stdctx := ctx.Option.Context
+		if ctx.Option.Flag&FieldQueryOption != 0 {
+			stdctx = SetFieldQueryToContext(stdctx, code.FieldQuery)
+		}
+		b, err := marshaler.MarshalJSON(stdctx)
+		if err != nil {
+			return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+		}
+		bb = b
+	} else {
+		marshaler, ok := v.(json.Marshaler)
+		if !ok {
+			return AppendNull(ctx, b), nil
+		}
+		b, err := marshaler.MarshalJSON()
+		if err != nil {
+			return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+		}
+		bb = b
+	}
+	marshalBuf := ctx.MarshalBuf[:0]
+	marshalBuf = append(append(marshalBuf, bb...), nul)
+	compactedBuf, err := compact(b, marshalBuf, (ctx.Option.Flag&HTMLEscapeOption) != 0)
+	if err != nil {
+		return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+	}
+	ctx.MarshalBuf = marshalBuf
+	return compactedBuf, nil
+}
+
+func AppendMarshalJSONIndent(ctx *RuntimeContext, code *Opcode, b []byte, v interface{}) ([]byte, error) {
+	rv := reflect.ValueOf(v) // convert by dynamic interface type
+	if (code.Flags & AddrForMarshalerFlags) != 0 {
+		if rv.CanAddr() {
+			rv = rv.Addr()
+		} else {
+			newV := reflect.New(rv.Type())
+			newV.Elem().Set(rv)
+			rv = newV
+		}
+	}
+	v = rv.Interface()
+	var bb []byte
+	if (code.Flags & MarshalerContextFlags) != 0 {
+		marshaler, ok := v.(marshalerContext)
+		if !ok {
+			return AppendNull(ctx, b), nil
+		}
+		b, err := marshaler.MarshalJSON(ctx.Option.Context)
+		if err != nil {
+			return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+		}
+		bb = b
+	} else {
+		marshaler, ok := v.(json.Marshaler)
+		if !ok {
+			return AppendNull(ctx, b), nil
+		}
+		b, err := marshaler.MarshalJSON()
+		if err != nil {
+			return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+		}
+		bb = b
+	}
+	marshalBuf := ctx.MarshalBuf[:0]
+	marshalBuf = append(append(marshalBuf, bb...), nul)
+	indentedBuf, err := doIndent(
+		b,
+		marshalBuf,
+		string(ctx.Prefix)+strings.Repeat(string(ctx.IndentStr), int(ctx.BaseIndent+code.Indent)),
+		string(ctx.IndentStr),
+		(ctx.Option.Flag&HTMLEscapeOption) != 0,
+	)
+	if err != nil {
+		return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+	}
+	ctx.MarshalBuf = marshalBuf
+	return indentedBuf, nil
+}
+
+func AppendMarshalText(ctx *RuntimeContext, code *Opcode, b []byte, v interface{}) ([]byte, error) {
+	rv := reflect.ValueOf(v) // convert by dynamic interface type
+	if (code.Flags & AddrForMarshalerFlags) != 0 {
+		if rv.CanAddr() {
+			rv = rv.Addr()
+		} else {
+			newV := reflect.New(rv.Type())
+			newV.Elem().Set(rv)
+			rv = newV
+		}
+	}
+	v = rv.Interface()
+	marshaler, ok := v.(encoding.TextMarshaler)
+	if !ok {
+		return AppendNull(ctx, b), nil
+	}
+	bytes, err := marshaler.MarshalText()
+	if err != nil {
+		return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+	}
+	return AppendString(ctx, b, *(*string)(unsafe.Pointer(&bytes))), nil
+}
+
+func AppendMarshalTextIndent(ctx *RuntimeContext, code *Opcode, b []byte, v interface{}) ([]byte, error) {
+	rv := reflect.ValueOf(v) // convert by dynamic interface type
+	if (code.Flags & AddrForMarshalerFlags) != 0 {
+		if rv.CanAddr() {
+			rv = rv.Addr()
+		} else {
+			newV := reflect.New(rv.Type())
+			newV.Elem().Set(rv)
+			rv = newV
+		}
+	}
+	v = rv.Interface()
+	marshaler, ok := v.(encoding.TextMarshaler)
+	if !ok {
+		return AppendNull(ctx, b), nil
+	}
+	bytes, err := marshaler.MarshalText()
+	if err != nil {
+		return nil, &errors.MarshalerError{Type: reflect.TypeOf(v), Err: err}
+	}
+	return AppendString(ctx, b, *(*string)(unsafe.Pointer(&bytes))), nil
+}
+
+func AppendNull(_ *RuntimeContext, b []byte) []byte {
+	return append(b, "null"...)
+}
+
+func AppendComma(_ *RuntimeContext, b []byte) []byte {
+	return append(b, ',')
+}
+
+func AppendCommaIndent(_ *RuntimeContext, b []byte) []byte {
+	return append(b, ',', '\n')
+}
+
+func AppendStructEnd(_ *RuntimeContext, b []byte) []byte {
+	return append(b, '}', ',')
+}
+
+func AppendStructEndIndent(ctx *RuntimeContext, code *Opcode, b []byte) []byte {
+	b = append(b, '\n')
+	b = append(b, ctx.Prefix...)
+	indentNum := ctx.BaseIndent + code.Indent - 1
+	for i := uint32(0); i < indentNum; i++ {
+		b = append(b, ctx.IndentStr...)
+	}
+	return append(b, '}', ',', '\n')
+}
+
+func AppendIndent(ctx *RuntimeContext, b []byte, indent uint32) []byte {
+	b = append(b, ctx.Prefix...)
+	indentNum := ctx.BaseIndent + indent
+	for i := uint32(0); i < indentNum; i++ {
+		b = append(b, ctx.IndentStr...)
+	}
+	return b
+}
+
+func IsNilForMarshaler(v interface{}) bool {
+	rv := reflect.ValueOf(v)
+	switch rv.Kind() {
+	case reflect.Bool:
+		return !rv.Bool()
+	case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
+		return rv.Int() == 0
+	case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr:
+		return rv.Uint() == 0
+	case reflect.Float32, reflect.Float64:
+		return math.Float64bits(rv.Float()) == 0
+	case reflect.Interface, reflect.Map, reflect.Ptr, reflect.Func:
+		return rv.IsNil()
+	case reflect.Slice:
+		return rv.IsNil() || rv.Len() == 0
+	case reflect.String:
+		return rv.Len() == 0
+	}
+	return false
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/indent.go b/vendor/github.com/goccy/go-json/internal/encoder/indent.go
new file mode 100644
index 00000000..dfe04b5e
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/indent.go
@@ -0,0 +1,211 @@
+package encoder
+
+import (
+	"bytes"
+	"fmt"
+
+	"github.com/goccy/go-json/internal/errors"
+)
+
+func takeIndentSrcRuntimeContext(src []byte) (*RuntimeContext, []byte) {
+	ctx := TakeRuntimeContext()
+	buf := ctx.Buf[:0]
+	buf = append(append(buf, src...), nul)
+	ctx.Buf = buf
+	return ctx, buf
+}
+
+func Indent(buf *bytes.Buffer, src []byte, prefix, indentStr string) error {
+	if len(src) == 0 {
+		return errors.ErrUnexpectedEndOfJSON("", 0)
+	}
+
+	srcCtx, srcBuf := takeIndentSrcRuntimeContext(src)
+	dstCtx := TakeRuntimeContext()
+	dst := dstCtx.Buf[:0]
+
+	dst, err := indentAndWrite(buf, dst, srcBuf, prefix, indentStr)
+	if err != nil {
+		ReleaseRuntimeContext(srcCtx)
+		ReleaseRuntimeContext(dstCtx)
+		return err
+	}
+	dstCtx.Buf = dst
+	ReleaseRuntimeContext(srcCtx)
+	ReleaseRuntimeContext(dstCtx)
+	return nil
+}
+
+func indentAndWrite(buf *bytes.Buffer, dst []byte, src []byte, prefix, indentStr string) ([]byte, error) {
+	dst, err := doIndent(dst, src, prefix, indentStr, false)
+	if err != nil {
+		return nil, err
+	}
+	if _, err := buf.Write(dst); err != nil {
+		return nil, err
+	}
+	return dst, nil
+}
+
+func doIndent(dst, src []byte, prefix, indentStr string, escape bool) ([]byte, error) {
+	buf, cursor, err := indentValue(dst, src, 0, 0, []byte(prefix), []byte(indentStr), escape)
+	if err != nil {
+		return nil, err
+	}
+	if err := validateEndBuf(src, cursor); err != nil {
+		return nil, err
+	}
+	return buf, nil
+}
+
+func indentValue(
+	dst []byte,
+	src []byte,
+	indentNum int,
+	cursor int64,
+	prefix []byte,
+	indentBytes []byte,
+	escape bool) ([]byte, int64, error) {
+	for {
+		switch src[cursor] {
+		case ' ', '\t', '\n', '\r':
+			cursor++
+			continue
+		case '{':
+			return indentObject(dst, src, indentNum, cursor, prefix, indentBytes, escape)
+		case '}':
+			return nil, 0, errors.ErrSyntax("unexpected character '}'", cursor)
+		case '[':
+			return indentArray(dst, src, indentNum, cursor, prefix, indentBytes, escape)
+		case ']':
+			return nil, 0, errors.ErrSyntax("unexpected character ']'", cursor)
+		case '"':
+			return compactString(dst, src, cursor, escape)
+		case '-', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9':
+			return compactNumber(dst, src, cursor)
+		case 't':
+			return compactTrue(dst, src, cursor)
+		case 'f':
+			return compactFalse(dst, src, cursor)
+		case 'n':
+			return compactNull(dst, src, cursor)
+		default:
+			return nil, 0, errors.ErrSyntax(fmt.Sprintf("unexpected character '%c'", src[cursor]), cursor)
+		}
+	}
+}
+
+func indentObject(
+	dst []byte,
+	src []byte,
+	indentNum int,
+	cursor int64,
+	prefix []byte,
+	indentBytes []byte,
+	escape bool) ([]byte, int64, error) {
+	if src[cursor] == '{' {
+		dst = append(dst, '{')
+	} else {
+		return nil, 0, errors.ErrExpected("expected { character for object value", cursor)
+	}
+	cursor = skipWhiteSpace(src, cursor+1)
+	if src[cursor] == '}' {
+		dst = append(dst, '}')
+		return dst, cursor + 1, nil
+	}
+	indentNum++
+	var err error
+	for {
+		dst = append(append(dst, '\n'), prefix...)
+		for i := 0; i < indentNum; i++ {
+			dst = append(dst, indentBytes...)
+		}
+		cursor = skipWhiteSpace(src, cursor)
+		dst, cursor, err = compactString(dst, src, cursor, escape)
+		if err != nil {
+			return nil, 0, err
+		}
+		cursor = skipWhiteSpace(src, cursor)
+		if src[cursor] != ':' {
+			return nil, 0, errors.ErrSyntax(
+				fmt.Sprintf("invalid character '%c' after object key", src[cursor]),
+				cursor+1,
+			)
+		}
+		dst = append(dst, ':', ' ')
+		dst, cursor, err = indentValue(dst, src, indentNum, cursor+1, prefix, indentBytes, escape)
+		if err != nil {
+			return nil, 0, err
+		}
+		cursor = skipWhiteSpace(src, cursor)
+		switch src[cursor] {
+		case '}':
+			dst = append(append(dst, '\n'), prefix...)
+			for i := 0; i < indentNum-1; i++ {
+				dst = append(dst, indentBytes...)
+			}
+			dst = append(dst, '}')
+			cursor++
+			return dst, cursor, nil
+		case ',':
+			dst = append(dst, ',')
+		default:
+			return nil, 0, errors.ErrSyntax(
+				fmt.Sprintf("invalid character '%c' after object key:value pair", src[cursor]),
+				cursor+1,
+			)
+		}
+		cursor++
+	}
+}
+
+func indentArray(
+	dst []byte,
+	src []byte,
+	indentNum int,
+	cursor int64,
+	prefix []byte,
+	indentBytes []byte,
+	escape bool) ([]byte, int64, error) {
+	if src[cursor] == '[' {
+		dst = append(dst, '[')
+	} else {
+		return nil, 0, errors.ErrExpected("expected [ character for array value", cursor)
+	}
+	cursor = skipWhiteSpace(src, cursor+1)
+	if src[cursor] == ']' {
+		dst = append(dst, ']')
+		return dst, cursor + 1, nil
+	}
+	indentNum++
+	var err error
+	for {
+		dst = append(append(dst, '\n'), prefix...)
+		for i := 0; i < indentNum; i++ {
+			dst = append(dst, indentBytes...)
+		}
+		dst, cursor, err = indentValue(dst, src, indentNum, cursor, prefix, indentBytes, escape)
+		if err != nil {
+			return nil, 0, err
+		}
+		cursor = skipWhiteSpace(src, cursor)
+		switch src[cursor] {
+		case ']':
+			dst = append(append(dst, '\n'), prefix...)
+			for i := 0; i < indentNum-1; i++ {
+				dst = append(dst, indentBytes...)
+			}
+			dst = append(dst, ']')
+			cursor++
+			return dst, cursor, nil
+		case ',':
+			dst = append(dst, ',')
+		default:
+			return nil, 0, errors.ErrSyntax(
+				fmt.Sprintf("invalid character '%c' after array value", src[cursor]),
+				cursor+1,
+			)
+		}
+		cursor++
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/int.go b/vendor/github.com/goccy/go-json/internal/encoder/int.go
new file mode 100644
index 00000000..8b5febea
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/int.go
@@ -0,0 +1,176 @@
+// This files's processing codes are inspired by https://github.com/segmentio/encoding.
+// The license notation is as follows.
+//
+// # MIT License
+//
+// Copyright (c) 2019 Segment.io, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+package encoder
+
+import (
+	"unsafe"
+)
+
+var endianness int
+
+func init() {
+	var b [2]byte
+	*(*uint16)(unsafe.Pointer(&b)) = uint16(0xABCD)
+
+	switch b[0] {
+	case 0xCD:
+		endianness = 0 // LE
+	case 0xAB:
+		endianness = 1 // BE
+	default:
+		panic("could not determine endianness")
+	}
+}
+
+// "00010203...96979899" cast to []uint16
+var intLELookup = [100]uint16{
+	0x3030, 0x3130, 0x3230, 0x3330, 0x3430, 0x3530, 0x3630, 0x3730, 0x3830, 0x3930,
+	0x3031, 0x3131, 0x3231, 0x3331, 0x3431, 0x3531, 0x3631, 0x3731, 0x3831, 0x3931,
+	0x3032, 0x3132, 0x3232, 0x3332, 0x3432, 0x3532, 0x3632, 0x3732, 0x3832, 0x3932,
+	0x3033, 0x3133, 0x3233, 0x3333, 0x3433, 0x3533, 0x3633, 0x3733, 0x3833, 0x3933,
+	0x3034, 0x3134, 0x3234, 0x3334, 0x3434, 0x3534, 0x3634, 0x3734, 0x3834, 0x3934,
+	0x3035, 0x3135, 0x3235, 0x3335, 0x3435, 0x3535, 0x3635, 0x3735, 0x3835, 0x3935,
+	0x3036, 0x3136, 0x3236, 0x3336, 0x3436, 0x3536, 0x3636, 0x3736, 0x3836, 0x3936,
+	0x3037, 0x3137, 0x3237, 0x3337, 0x3437, 0x3537, 0x3637, 0x3737, 0x3837, 0x3937,
+	0x3038, 0x3138, 0x3238, 0x3338, 0x3438, 0x3538, 0x3638, 0x3738, 0x3838, 0x3938,
+	0x3039, 0x3139, 0x3239, 0x3339, 0x3439, 0x3539, 0x3639, 0x3739, 0x3839, 0x3939,
+}
+
+var intBELookup = [100]uint16{
+	0x3030, 0x3031, 0x3032, 0x3033, 0x3034, 0x3035, 0x3036, 0x3037, 0x3038, 0x3039,
+	0x3130, 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3138, 0x3139,
+	0x3230, 0x3231, 0x3232, 0x3233, 0x3234, 0x3235, 0x3236, 0x3237, 0x3238, 0x3239,
+	0x3330, 0x3331, 0x3332, 0x3333, 0x3334, 0x3335, 0x3336, 0x3337, 0x3338, 0x3339,
+	0x3430, 0x3431, 0x3432, 0x3433, 0x3434, 0x3435, 0x3436, 0x3437, 0x3438, 0x3439,
+	0x3530, 0x3531, 0x3532, 0x3533, 0x3534, 0x3535, 0x3536, 0x3537, 0x3538, 0x3539,
+	0x3630, 0x3631, 0x3632, 0x3633, 0x3634, 0x3635, 0x3636, 0x3637, 0x3638, 0x3639,
+	0x3730, 0x3731, 0x3732, 0x3733, 0x3734, 0x3735, 0x3736, 0x3737, 0x3738, 0x3739,
+	0x3830, 0x3831, 0x3832, 0x3833, 0x3834, 0x3835, 0x3836, 0x3837, 0x3838, 0x3839,
+	0x3930, 0x3931, 0x3932, 0x3933, 0x3934, 0x3935, 0x3936, 0x3937, 0x3938, 0x3939,
+}
+
+var intLookup = [2]*[100]uint16{&intLELookup, &intBELookup}
+
+func numMask(numBitSize uint8) uint64 {
+	return 1<<numBitSize - 1
+}
+
+func AppendInt(_ *RuntimeContext, out []byte, p uintptr, code *Opcode) []byte {
+	var u64 uint64
+	switch code.NumBitSize {
+	case 8:
+		u64 = (uint64)(**(**uint8)(unsafe.Pointer(&p)))
+	case 16:
+		u64 = (uint64)(**(**uint16)(unsafe.Pointer(&p)))
+	case 32:
+		u64 = (uint64)(**(**uint32)(unsafe.Pointer(&p)))
+	case 64:
+		u64 = **(**uint64)(unsafe.Pointer(&p))
+	}
+	mask := numMask(code.NumBitSize)
+	n := u64 & mask
+	negative := (u64>>(code.NumBitSize-1))&1 == 1
+	if !negative {
+		if n < 10 {
+			return append(out, byte(n+'0'))
+		} else if n < 100 {
+			u := intLELookup[n]
+			return append(out, byte(u), byte(u>>8))
+		}
+	} else {
+		n = -n & mask
+	}
+
+	lookup := intLookup[endianness]
+
+	var b [22]byte
+	u := (*[11]uint16)(unsafe.Pointer(&b))
+	i := 11
+
+	for n >= 100 {
+		j := n % 100
+		n /= 100
+		i--
+		u[i] = lookup[j]
+	}
+
+	i--
+	u[i] = lookup[n]
+
+	i *= 2 // convert to byte index
+	if n < 10 {
+		i++ // remove leading zero
+	}
+	if negative {
+		i--
+		b[i] = '-'
+	}
+
+	return append(out, b[i:]...)
+}
+
+func AppendUint(_ *RuntimeContext, out []byte, p uintptr, code *Opcode) []byte {
+	var u64 uint64
+	switch code.NumBitSize {
+	case 8:
+		u64 = (uint64)(**(**uint8)(unsafe.Pointer(&p)))
+	case 16:
+		u64 = (uint64)(**(**uint16)(unsafe.Pointer(&p)))
+	case 32:
+		u64 = (uint64)(**(**uint32)(unsafe.Pointer(&p)))
+	case 64:
+		u64 = **(**uint64)(unsafe.Pointer(&p))
+	}
+	mask := numMask(code.NumBitSize)
+	n := u64 & mask
+	if n < 10 {
+		return append(out, byte(n+'0'))
+	} else if n < 100 {
+		u := intLELookup[n]
+		return append(out, byte(u), byte(u>>8))
+	}
+
+	lookup := intLookup[endianness]
+
+	var b [22]byte
+	u := (*[11]uint16)(unsafe.Pointer(&b))
+	i := 11
+
+	for n >= 100 {
+		j := n % 100
+		n /= 100
+		i--
+		u[i] = lookup[j]
+	}
+
+	i--
+	u[i] = lookup[n]
+
+	i *= 2 // convert to byte index
+	if n < 10 {
+		i++ // remove leading zero
+	}
+	return append(out, b[i:]...)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/map112.go b/vendor/github.com/goccy/go-json/internal/encoder/map112.go
new file mode 100644
index 00000000..e96ffadf
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/map112.go
@@ -0,0 +1,9 @@
+//go:build !go1.13
+// +build !go1.13
+
+package encoder
+
+import "unsafe"
+
+//go:linkname MapIterValue reflect.mapitervalue
+func MapIterValue(it *mapIter) unsafe.Pointer
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/map113.go b/vendor/github.com/goccy/go-json/internal/encoder/map113.go
new file mode 100644
index 00000000..9b69dcc3
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/map113.go
@@ -0,0 +1,9 @@
+//go:build go1.13
+// +build go1.13
+
+package encoder
+
+import "unsafe"
+
+//go:linkname MapIterValue reflect.mapiterelem
+func MapIterValue(it *mapIter) unsafe.Pointer
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/opcode.go b/vendor/github.com/goccy/go-json/internal/encoder/opcode.go
new file mode 100644
index 00000000..df22f554
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/opcode.go
@@ -0,0 +1,752 @@
+package encoder
+
+import (
+	"bytes"
+	"fmt"
+	"sort"
+	"strings"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+const uintptrSize = 4 << (^uintptr(0) >> 63)
+
+type OpFlags uint16
+
+const (
+	AnonymousHeadFlags     OpFlags = 1 << 0
+	AnonymousKeyFlags      OpFlags = 1 << 1
+	IndirectFlags          OpFlags = 1 << 2
+	IsTaggedKeyFlags       OpFlags = 1 << 3
+	NilCheckFlags          OpFlags = 1 << 4
+	AddrForMarshalerFlags  OpFlags = 1 << 5
+	IsNextOpPtrTypeFlags   OpFlags = 1 << 6
+	IsNilableTypeFlags     OpFlags = 1 << 7
+	MarshalerContextFlags  OpFlags = 1 << 8
+	NonEmptyInterfaceFlags OpFlags = 1 << 9
+)
+
+type Opcode struct {
+	Op         OpType  // operation type
+	Idx        uint32  // offset to access ptr
+	Next       *Opcode // next opcode
+	End        *Opcode // array/slice/struct/map end
+	NextField  *Opcode // next struct field
+	Key        string  // struct field key
+	Offset     uint32  // offset size from struct header
+	PtrNum     uint8   // pointer number: e.g. double pointer is 2.
+	NumBitSize uint8
+	Flags      OpFlags
+
+	Type       *runtime.Type // go type
+	Jmp        *CompiledCode // for recursive call
+	FieldQuery *FieldQuery   // field query for Interface / MarshalJSON / MarshalText
+	ElemIdx    uint32        // offset to access array/slice elem
+	Length     uint32        // offset to access slice length or array length
+	Indent     uint32        // indent number
+	Size       uint32        // array/slice elem size
+	DisplayIdx uint32        // opcode index
+	DisplayKey string        // key text to display
+}
+
+func (c *Opcode) Validate() error {
+	var prevIdx uint32
+	for code := c; !code.IsEnd(); {
+		if prevIdx != 0 {
+			if code.DisplayIdx != prevIdx+1 {
+				return fmt.Errorf(
+					"invalid index. previous display index is %d but next is %d. dump = %s",
+					prevIdx, code.DisplayIdx, c.Dump(),
+				)
+			}
+		}
+		prevIdx = code.DisplayIdx
+		code = code.IterNext()
+	}
+	return nil
+}
+
+func (c *Opcode) IterNext() *Opcode {
+	if c == nil {
+		return nil
+	}
+	switch c.Op.CodeType() {
+	case CodeArrayElem, CodeSliceElem, CodeMapKey:
+		return c.End
+	default:
+		return c.Next
+	}
+}
+
+func (c *Opcode) IsEnd() bool {
+	if c == nil {
+		return true
+	}
+	return c.Op == OpEnd || c.Op == OpInterfaceEnd || c.Op == OpRecursiveEnd
+}
+
+func (c *Opcode) MaxIdx() uint32 {
+	max := uint32(0)
+	for _, value := range []uint32{
+		c.Idx,
+		c.ElemIdx,
+		c.Length,
+		c.Size,
+	} {
+		if max < value {
+			max = value
+		}
+	}
+	return max
+}
+
+func (c *Opcode) ToHeaderType(isString bool) OpType {
+	switch c.Op {
+	case OpInt:
+		if isString {
+			return OpStructHeadIntString
+		}
+		return OpStructHeadInt
+	case OpIntPtr:
+		if isString {
+			return OpStructHeadIntPtrString
+		}
+		return OpStructHeadIntPtr
+	case OpUint:
+		if isString {
+			return OpStructHeadUintString
+		}
+		return OpStructHeadUint
+	case OpUintPtr:
+		if isString {
+			return OpStructHeadUintPtrString
+		}
+		return OpStructHeadUintPtr
+	case OpFloat32:
+		if isString {
+			return OpStructHeadFloat32String
+		}
+		return OpStructHeadFloat32
+	case OpFloat32Ptr:
+		if isString {
+			return OpStructHeadFloat32PtrString
+		}
+		return OpStructHeadFloat32Ptr
+	case OpFloat64:
+		if isString {
+			return OpStructHeadFloat64String
+		}
+		return OpStructHeadFloat64
+	case OpFloat64Ptr:
+		if isString {
+			return OpStructHeadFloat64PtrString
+		}
+		return OpStructHeadFloat64Ptr
+	case OpString:
+		if isString {
+			return OpStructHeadStringString
+		}
+		return OpStructHeadString
+	case OpStringPtr:
+		if isString {
+			return OpStructHeadStringPtrString
+		}
+		return OpStructHeadStringPtr
+	case OpNumber:
+		if isString {
+			return OpStructHeadNumberString
+		}
+		return OpStructHeadNumber
+	case OpNumberPtr:
+		if isString {
+			return OpStructHeadNumberPtrString
+		}
+		return OpStructHeadNumberPtr
+	case OpBool:
+		if isString {
+			return OpStructHeadBoolString
+		}
+		return OpStructHeadBool
+	case OpBoolPtr:
+		if isString {
+			return OpStructHeadBoolPtrString
+		}
+		return OpStructHeadBoolPtr
+	case OpBytes:
+		return OpStructHeadBytes
+	case OpBytesPtr:
+		return OpStructHeadBytesPtr
+	case OpMap:
+		return OpStructHeadMap
+	case OpMapPtr:
+		c.Op = OpMap
+		return OpStructHeadMapPtr
+	case OpArray:
+		return OpStructHeadArray
+	case OpArrayPtr:
+		c.Op = OpArray
+		return OpStructHeadArrayPtr
+	case OpSlice:
+		return OpStructHeadSlice
+	case OpSlicePtr:
+		c.Op = OpSlice
+		return OpStructHeadSlicePtr
+	case OpMarshalJSON:
+		return OpStructHeadMarshalJSON
+	case OpMarshalJSONPtr:
+		return OpStructHeadMarshalJSONPtr
+	case OpMarshalText:
+		return OpStructHeadMarshalText
+	case OpMarshalTextPtr:
+		return OpStructHeadMarshalTextPtr
+	}
+	return OpStructHead
+}
+
+func (c *Opcode) ToFieldType(isString bool) OpType {
+	switch c.Op {
+	case OpInt:
+		if isString {
+			return OpStructFieldIntString
+		}
+		return OpStructFieldInt
+	case OpIntPtr:
+		if isString {
+			return OpStructFieldIntPtrString
+		}
+		return OpStructFieldIntPtr
+	case OpUint:
+		if isString {
+			return OpStructFieldUintString
+		}
+		return OpStructFieldUint
+	case OpUintPtr:
+		if isString {
+			return OpStructFieldUintPtrString
+		}
+		return OpStructFieldUintPtr
+	case OpFloat32:
+		if isString {
+			return OpStructFieldFloat32String
+		}
+		return OpStructFieldFloat32
+	case OpFloat32Ptr:
+		if isString {
+			return OpStructFieldFloat32PtrString
+		}
+		return OpStructFieldFloat32Ptr
+	case OpFloat64:
+		if isString {
+			return OpStructFieldFloat64String
+		}
+		return OpStructFieldFloat64
+	case OpFloat64Ptr:
+		if isString {
+			return OpStructFieldFloat64PtrString
+		}
+		return OpStructFieldFloat64Ptr
+	case OpString:
+		if isString {
+			return OpStructFieldStringString
+		}
+		return OpStructFieldString
+	case OpStringPtr:
+		if isString {
+			return OpStructFieldStringPtrString
+		}
+		return OpStructFieldStringPtr
+	case OpNumber:
+		if isString {
+			return OpStructFieldNumberString
+		}
+		return OpStructFieldNumber
+	case OpNumberPtr:
+		if isString {
+			return OpStructFieldNumberPtrString
+		}
+		return OpStructFieldNumberPtr
+	case OpBool:
+		if isString {
+			return OpStructFieldBoolString
+		}
+		return OpStructFieldBool
+	case OpBoolPtr:
+		if isString {
+			return OpStructFieldBoolPtrString
+		}
+		return OpStructFieldBoolPtr
+	case OpBytes:
+		return OpStructFieldBytes
+	case OpBytesPtr:
+		return OpStructFieldBytesPtr
+	case OpMap:
+		return OpStructFieldMap
+	case OpMapPtr:
+		c.Op = OpMap
+		return OpStructFieldMapPtr
+	case OpArray:
+		return OpStructFieldArray
+	case OpArrayPtr:
+		c.Op = OpArray
+		return OpStructFieldArrayPtr
+	case OpSlice:
+		return OpStructFieldSlice
+	case OpSlicePtr:
+		c.Op = OpSlice
+		return OpStructFieldSlicePtr
+	case OpMarshalJSON:
+		return OpStructFieldMarshalJSON
+	case OpMarshalJSONPtr:
+		return OpStructFieldMarshalJSONPtr
+	case OpMarshalText:
+		return OpStructFieldMarshalText
+	case OpMarshalTextPtr:
+		return OpStructFieldMarshalTextPtr
+	}
+	return OpStructField
+}
+
+func newOpCode(ctx *compileContext, typ *runtime.Type, op OpType) *Opcode {
+	return newOpCodeWithNext(ctx, typ, op, newEndOp(ctx, typ))
+}
+
+func opcodeOffset(idx int) uint32 {
+	return uint32(idx) * uintptrSize
+}
+
+func getCodeAddrByIdx(head *Opcode, idx uint32) *Opcode {
+	addr := uintptr(unsafe.Pointer(head)) + uintptr(idx)*unsafe.Sizeof(Opcode{})
+	return *(**Opcode)(unsafe.Pointer(&addr))
+}
+
+func copyOpcode(code *Opcode) *Opcode {
+	codeNum := ToEndCode(code).DisplayIdx + 1
+	codeSlice := make([]Opcode, codeNum)
+	head := (*Opcode)((*runtime.SliceHeader)(unsafe.Pointer(&codeSlice)).Data)
+	ptr := head
+	c := code
+	for {
+		*ptr = Opcode{
+			Op:         c.Op,
+			Key:        c.Key,
+			PtrNum:     c.PtrNum,
+			NumBitSize: c.NumBitSize,
+			Flags:      c.Flags,
+			Idx:        c.Idx,
+			Offset:     c.Offset,
+			Type:       c.Type,
+			FieldQuery: c.FieldQuery,
+			DisplayIdx: c.DisplayIdx,
+			DisplayKey: c.DisplayKey,
+			ElemIdx:    c.ElemIdx,
+			Length:     c.Length,
+			Size:       c.Size,
+			Indent:     c.Indent,
+			Jmp:        c.Jmp,
+		}
+		if c.End != nil {
+			ptr.End = getCodeAddrByIdx(head, c.End.DisplayIdx)
+		}
+		if c.NextField != nil {
+			ptr.NextField = getCodeAddrByIdx(head, c.NextField.DisplayIdx)
+		}
+		if c.Next != nil {
+			ptr.Next = getCodeAddrByIdx(head, c.Next.DisplayIdx)
+		}
+		if c.IsEnd() {
+			break
+		}
+		ptr = getCodeAddrByIdx(head, c.DisplayIdx+1)
+		c = c.IterNext()
+	}
+	return head
+}
+
+func setTotalLengthToInterfaceOp(code *Opcode) {
+	for c := code; !c.IsEnd(); {
+		if c.Op == OpInterface || c.Op == OpInterfacePtr {
+			c.Length = uint32(code.TotalLength())
+		}
+		c = c.IterNext()
+	}
+}
+
+func ToEndCode(code *Opcode) *Opcode {
+	c := code
+	for !c.IsEnd() {
+		c = c.IterNext()
+	}
+	return c
+}
+
+func copyToInterfaceOpcode(code *Opcode) *Opcode {
+	copied := copyOpcode(code)
+	c := copied
+	c = ToEndCode(c)
+	c.Idx += uintptrSize
+	c.ElemIdx = c.Idx + uintptrSize
+	c.Length = c.Idx + 2*uintptrSize
+	c.Op = OpInterfaceEnd
+	return copied
+}
+
+func newOpCodeWithNext(ctx *compileContext, typ *runtime.Type, op OpType, next *Opcode) *Opcode {
+	return &Opcode{
+		Op:         op,
+		Idx:        opcodeOffset(ctx.ptrIndex),
+		Next:       next,
+		Type:       typ,
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+	}
+}
+
+func newEndOp(ctx *compileContext, typ *runtime.Type) *Opcode {
+	return newOpCodeWithNext(ctx, typ, OpEnd, nil)
+}
+
+func (c *Opcode) TotalLength() int {
+	var idx int
+	code := c
+	for !code.IsEnd() {
+		maxIdx := int(code.MaxIdx() / uintptrSize)
+		if idx < maxIdx {
+			idx = maxIdx
+		}
+		if code.Op == OpRecursiveEnd {
+			break
+		}
+		code = code.IterNext()
+	}
+	maxIdx := int(code.MaxIdx() / uintptrSize)
+	if idx < maxIdx {
+		idx = maxIdx
+	}
+	return idx + 1
+}
+
+func (c *Opcode) dumpHead(code *Opcode) string {
+	var length uint32
+	if code.Op.CodeType() == CodeArrayHead {
+		length = code.Length
+	} else {
+		length = code.Length / uintptrSize
+	}
+	return fmt.Sprintf(
+		`[%03d]%s%s ([idx:%d][elemIdx:%d][length:%d])`,
+		code.DisplayIdx,
+		strings.Repeat("-", int(code.Indent)),
+		code.Op,
+		code.Idx/uintptrSize,
+		code.ElemIdx/uintptrSize,
+		length,
+	)
+}
+
+func (c *Opcode) dumpMapHead(code *Opcode) string {
+	return fmt.Sprintf(
+		`[%03d]%s%s ([idx:%d])`,
+		code.DisplayIdx,
+		strings.Repeat("-", int(code.Indent)),
+		code.Op,
+		code.Idx/uintptrSize,
+	)
+}
+
+func (c *Opcode) dumpMapEnd(code *Opcode) string {
+	return fmt.Sprintf(
+		`[%03d]%s%s ([idx:%d])`,
+		code.DisplayIdx,
+		strings.Repeat("-", int(code.Indent)),
+		code.Op,
+		code.Idx/uintptrSize,
+	)
+}
+
+func (c *Opcode) dumpElem(code *Opcode) string {
+	var length uint32
+	if code.Op.CodeType() == CodeArrayElem {
+		length = code.Length
+	} else {
+		length = code.Length / uintptrSize
+	}
+	return fmt.Sprintf(
+		`[%03d]%s%s ([idx:%d][elemIdx:%d][length:%d][size:%d])`,
+		code.DisplayIdx,
+		strings.Repeat("-", int(code.Indent)),
+		code.Op,
+		code.Idx/uintptrSize,
+		code.ElemIdx/uintptrSize,
+		length,
+		code.Size,
+	)
+}
+
+func (c *Opcode) dumpField(code *Opcode) string {
+	return fmt.Sprintf(
+		`[%03d]%s%s ([idx:%d][key:%s][offset:%d])`,
+		code.DisplayIdx,
+		strings.Repeat("-", int(code.Indent)),
+		code.Op,
+		code.Idx/uintptrSize,
+		code.DisplayKey,
+		code.Offset,
+	)
+}
+
+func (c *Opcode) dumpKey(code *Opcode) string {
+	return fmt.Sprintf(
+		`[%03d]%s%s ([idx:%d])`,
+		code.DisplayIdx,
+		strings.Repeat("-", int(code.Indent)),
+		code.Op,
+		code.Idx/uintptrSize,
+	)
+}
+
+func (c *Opcode) dumpValue(code *Opcode) string {
+	return fmt.Sprintf(
+		`[%03d]%s%s ([idx:%d])`,
+		code.DisplayIdx,
+		strings.Repeat("-", int(code.Indent)),
+		code.Op,
+		code.Idx/uintptrSize,
+	)
+}
+
+func (c *Opcode) Dump() string {
+	codes := []string{}
+	for code := c; !code.IsEnd(); {
+		switch code.Op.CodeType() {
+		case CodeSliceHead:
+			codes = append(codes, c.dumpHead(code))
+			code = code.Next
+		case CodeMapHead:
+			codes = append(codes, c.dumpMapHead(code))
+			code = code.Next
+		case CodeArrayElem, CodeSliceElem:
+			codes = append(codes, c.dumpElem(code))
+			code = code.End
+		case CodeMapKey:
+			codes = append(codes, c.dumpKey(code))
+			code = code.End
+		case CodeMapValue:
+			codes = append(codes, c.dumpValue(code))
+			code = code.Next
+		case CodeMapEnd:
+			codes = append(codes, c.dumpMapEnd(code))
+			code = code.Next
+		case CodeStructField:
+			codes = append(codes, c.dumpField(code))
+			code = code.Next
+		case CodeStructEnd:
+			codes = append(codes, c.dumpField(code))
+			code = code.Next
+		default:
+			codes = append(codes, fmt.Sprintf(
+				"[%03d]%s%s ([idx:%d])",
+				code.DisplayIdx,
+				strings.Repeat("-", int(code.Indent)),
+				code.Op,
+				code.Idx/uintptrSize,
+			))
+			code = code.Next
+		}
+	}
+	return strings.Join(codes, "\n")
+}
+
+func (c *Opcode) DumpDOT() string {
+	type edge struct {
+		from, to *Opcode
+		label    string
+		weight   int
+	}
+	var edges []edge
+
+	b := &bytes.Buffer{}
+	fmt.Fprintf(b, "digraph \"%p\" {\n", c.Type)
+	fmt.Fprintln(b, "mclimit=1.5;\nrankdir=TD;\nordering=out;\nnode[shape=box];")
+	for code := c; !code.IsEnd(); {
+		label := code.Op.String()
+		fmt.Fprintf(b, "\"%p\" [label=%q];\n", code, label)
+		if p := code.Next; p != nil {
+			edges = append(edges, edge{
+				from:   code,
+				to:     p,
+				label:  "Next",
+				weight: 10,
+			})
+		}
+		if p := code.NextField; p != nil {
+			edges = append(edges, edge{
+				from:   code,
+				to:     p,
+				label:  "NextField",
+				weight: 2,
+			})
+		}
+		if p := code.End; p != nil {
+			edges = append(edges, edge{
+				from:   code,
+				to:     p,
+				label:  "End",
+				weight: 1,
+			})
+		}
+		if p := code.Jmp; p != nil {
+			edges = append(edges, edge{
+				from:   code,
+				to:     p.Code,
+				label:  "Jmp",
+				weight: 1,
+			})
+		}
+
+		switch code.Op.CodeType() {
+		case CodeSliceHead:
+			code = code.Next
+		case CodeMapHead:
+			code = code.Next
+		case CodeArrayElem, CodeSliceElem:
+			code = code.End
+		case CodeMapKey:
+			code = code.End
+		case CodeMapValue:
+			code = code.Next
+		case CodeMapEnd:
+			code = code.Next
+		case CodeStructField:
+			code = code.Next
+		case CodeStructEnd:
+			code = code.Next
+		default:
+			code = code.Next
+		}
+		if code.IsEnd() {
+			fmt.Fprintf(b, "\"%p\" [label=%q];\n", code, code.Op.String())
+		}
+	}
+	sort.Slice(edges, func(i, j int) bool {
+		return edges[i].to.DisplayIdx < edges[j].to.DisplayIdx
+	})
+	for _, e := range edges {
+		fmt.Fprintf(b, "\"%p\" -> \"%p\" [label=%q][weight=%d];\n", e.from, e.to, e.label, e.weight)
+	}
+	fmt.Fprint(b, "}")
+	return b.String()
+}
+
+func newSliceHeaderCode(ctx *compileContext, typ *runtime.Type) *Opcode {
+	idx := opcodeOffset(ctx.ptrIndex)
+	ctx.incPtrIndex()
+	elemIdx := opcodeOffset(ctx.ptrIndex)
+	ctx.incPtrIndex()
+	length := opcodeOffset(ctx.ptrIndex)
+	return &Opcode{
+		Op:         OpSlice,
+		Type:       typ,
+		Idx:        idx,
+		DisplayIdx: ctx.opcodeIndex,
+		ElemIdx:    elemIdx,
+		Length:     length,
+		Indent:     ctx.indent,
+	}
+}
+
+func newSliceElemCode(ctx *compileContext, typ *runtime.Type, head *Opcode, size uintptr) *Opcode {
+	return &Opcode{
+		Op:         OpSliceElem,
+		Type:       typ,
+		Idx:        head.Idx,
+		DisplayIdx: ctx.opcodeIndex,
+		ElemIdx:    head.ElemIdx,
+		Length:     head.Length,
+		Indent:     ctx.indent,
+		Size:       uint32(size),
+	}
+}
+
+func newArrayHeaderCode(ctx *compileContext, typ *runtime.Type, alen int) *Opcode {
+	idx := opcodeOffset(ctx.ptrIndex)
+	ctx.incPtrIndex()
+	elemIdx := opcodeOffset(ctx.ptrIndex)
+	return &Opcode{
+		Op:         OpArray,
+		Type:       typ,
+		Idx:        idx,
+		DisplayIdx: ctx.opcodeIndex,
+		ElemIdx:    elemIdx,
+		Indent:     ctx.indent,
+		Length:     uint32(alen),
+	}
+}
+
+func newArrayElemCode(ctx *compileContext, typ *runtime.Type, head *Opcode, length int, size uintptr) *Opcode {
+	return &Opcode{
+		Op:         OpArrayElem,
+		Type:       typ,
+		Idx:        head.Idx,
+		DisplayIdx: ctx.opcodeIndex,
+		ElemIdx:    head.ElemIdx,
+		Length:     uint32(length),
+		Indent:     ctx.indent,
+		Size:       uint32(size),
+	}
+}
+
+func newMapHeaderCode(ctx *compileContext, typ *runtime.Type) *Opcode {
+	idx := opcodeOffset(ctx.ptrIndex)
+	ctx.incPtrIndex()
+	return &Opcode{
+		Op:         OpMap,
+		Type:       typ,
+		Idx:        idx,
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+	}
+}
+
+func newMapKeyCode(ctx *compileContext, typ *runtime.Type, head *Opcode) *Opcode {
+	return &Opcode{
+		Op:         OpMapKey,
+		Type:       typ,
+		Idx:        head.Idx,
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+	}
+}
+
+func newMapValueCode(ctx *compileContext, typ *runtime.Type, head *Opcode) *Opcode {
+	return &Opcode{
+		Op:         OpMapValue,
+		Type:       typ,
+		Idx:        head.Idx,
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+	}
+}
+
+func newMapEndCode(ctx *compileContext, typ *runtime.Type, head *Opcode) *Opcode {
+	return &Opcode{
+		Op:         OpMapEnd,
+		Type:       typ,
+		Idx:        head.Idx,
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+		Next:       newEndOp(ctx, typ),
+	}
+}
+
+func newRecursiveCode(ctx *compileContext, typ *runtime.Type, jmp *CompiledCode) *Opcode {
+	return &Opcode{
+		Op:         OpRecursive,
+		Type:       typ,
+		Idx:        opcodeOffset(ctx.ptrIndex),
+		Next:       newEndOp(ctx, typ),
+		DisplayIdx: ctx.opcodeIndex,
+		Indent:     ctx.indent,
+		Jmp:        jmp,
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/option.go b/vendor/github.com/goccy/go-json/internal/encoder/option.go
new file mode 100644
index 00000000..12c58e46
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/option.go
@@ -0,0 +1,48 @@
+package encoder
+
+import (
+	"context"
+	"io"
+)
+
+type OptionFlag uint8
+
+const (
+	HTMLEscapeOption OptionFlag = 1 << iota
+	IndentOption
+	UnorderedMapOption
+	DebugOption
+	ColorizeOption
+	ContextOption
+	NormalizeUTF8Option
+	FieldQueryOption
+)
+
+type Option struct {
+	Flag        OptionFlag
+	ColorScheme *ColorScheme
+	Context     context.Context
+	DebugOut    io.Writer
+	DebugDOTOut io.WriteCloser
+}
+
+type EncodeFormat struct {
+	Header string
+	Footer string
+}
+
+type EncodeFormatScheme struct {
+	Int       EncodeFormat
+	Uint      EncodeFormat
+	Float     EncodeFormat
+	Bool      EncodeFormat
+	String    EncodeFormat
+	Binary    EncodeFormat
+	ObjectKey EncodeFormat
+	Null      EncodeFormat
+}
+
+type (
+	ColorScheme = EncodeFormatScheme
+	ColorFormat = EncodeFormat
+)
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/optype.go b/vendor/github.com/goccy/go-json/internal/encoder/optype.go
new file mode 100644
index 00000000..5c1241b4
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/optype.go
@@ -0,0 +1,932 @@
+// Code generated by internal/cmd/generator. DO NOT EDIT!
+package encoder
+
+import (
+	"strings"
+)
+
+type CodeType int
+
+const (
+	CodeOp          CodeType = 0
+	CodeArrayHead   CodeType = 1
+	CodeArrayElem   CodeType = 2
+	CodeSliceHead   CodeType = 3
+	CodeSliceElem   CodeType = 4
+	CodeMapHead     CodeType = 5
+	CodeMapKey      CodeType = 6
+	CodeMapValue    CodeType = 7
+	CodeMapEnd      CodeType = 8
+	CodeRecursive   CodeType = 9
+	CodeStructField CodeType = 10
+	CodeStructEnd   CodeType = 11
+)
+
+var opTypeStrings = [400]string{
+	"End",
+	"Interface",
+	"Ptr",
+	"SliceElem",
+	"SliceEnd",
+	"ArrayElem",
+	"ArrayEnd",
+	"MapKey",
+	"MapValue",
+	"MapEnd",
+	"Recursive",
+	"RecursivePtr",
+	"RecursiveEnd",
+	"InterfaceEnd",
+	"Int",
+	"Uint",
+	"Float32",
+	"Float64",
+	"Bool",
+	"String",
+	"Bytes",
+	"Number",
+	"Array",
+	"Map",
+	"Slice",
+	"Struct",
+	"MarshalJSON",
+	"MarshalText",
+	"IntString",
+	"UintString",
+	"Float32String",
+	"Float64String",
+	"BoolString",
+	"StringString",
+	"NumberString",
+	"IntPtr",
+	"UintPtr",
+	"Float32Ptr",
+	"Float64Ptr",
+	"BoolPtr",
+	"StringPtr",
+	"BytesPtr",
+	"NumberPtr",
+	"ArrayPtr",
+	"MapPtr",
+	"SlicePtr",
+	"MarshalJSONPtr",
+	"MarshalTextPtr",
+	"InterfacePtr",
+	"IntPtrString",
+	"UintPtrString",
+	"Float32PtrString",
+	"Float64PtrString",
+	"BoolPtrString",
+	"StringPtrString",
+	"NumberPtrString",
+	"StructHeadInt",
+	"StructHeadOmitEmptyInt",
+	"StructPtrHeadInt",
+	"StructPtrHeadOmitEmptyInt",
+	"StructHeadUint",
+	"StructHeadOmitEmptyUint",
+	"StructPtrHeadUint",
+	"StructPtrHeadOmitEmptyUint",
+	"StructHeadFloat32",
+	"StructHeadOmitEmptyFloat32",
+	"StructPtrHeadFloat32",
+	"StructPtrHeadOmitEmptyFloat32",
+	"StructHeadFloat64",
+	"StructHeadOmitEmptyFloat64",
+	"StructPtrHeadFloat64",
+	"StructPtrHeadOmitEmptyFloat64",
+	"StructHeadBool",
+	"StructHeadOmitEmptyBool",
+	"StructPtrHeadBool",
+	"StructPtrHeadOmitEmptyBool",
+	"StructHeadString",
+	"StructHeadOmitEmptyString",
+	"StructPtrHeadString",
+	"StructPtrHeadOmitEmptyString",
+	"StructHeadBytes",
+	"StructHeadOmitEmptyBytes",
+	"StructPtrHeadBytes",
+	"StructPtrHeadOmitEmptyBytes",
+	"StructHeadNumber",
+	"StructHeadOmitEmptyNumber",
+	"StructPtrHeadNumber",
+	"StructPtrHeadOmitEmptyNumber",
+	"StructHeadArray",
+	"StructHeadOmitEmptyArray",
+	"StructPtrHeadArray",
+	"StructPtrHeadOmitEmptyArray",
+	"StructHeadMap",
+	"StructHeadOmitEmptyMap",
+	"StructPtrHeadMap",
+	"StructPtrHeadOmitEmptyMap",
+	"StructHeadSlice",
+	"StructHeadOmitEmptySlice",
+	"StructPtrHeadSlice",
+	"StructPtrHeadOmitEmptySlice",
+	"StructHeadStruct",
+	"StructHeadOmitEmptyStruct",
+	"StructPtrHeadStruct",
+	"StructPtrHeadOmitEmptyStruct",
+	"StructHeadMarshalJSON",
+	"StructHeadOmitEmptyMarshalJSON",
+	"StructPtrHeadMarshalJSON",
+	"StructPtrHeadOmitEmptyMarshalJSON",
+	"StructHeadMarshalText",
+	"StructHeadOmitEmptyMarshalText",
+	"StructPtrHeadMarshalText",
+	"StructPtrHeadOmitEmptyMarshalText",
+	"StructHeadIntString",
+	"StructHeadOmitEmptyIntString",
+	"StructPtrHeadIntString",
+	"StructPtrHeadOmitEmptyIntString",
+	"StructHeadUintString",
+	"StructHeadOmitEmptyUintString",
+	"StructPtrHeadUintString",
+	"StructPtrHeadOmitEmptyUintString",
+	"StructHeadFloat32String",
+	"StructHeadOmitEmptyFloat32String",
+	"StructPtrHeadFloat32String",
+	"StructPtrHeadOmitEmptyFloat32String",
+	"StructHeadFloat64String",
+	"StructHeadOmitEmptyFloat64String",
+	"StructPtrHeadFloat64String",
+	"StructPtrHeadOmitEmptyFloat64String",
+	"StructHeadBoolString",
+	"StructHeadOmitEmptyBoolString",
+	"StructPtrHeadBoolString",
+	"StructPtrHeadOmitEmptyBoolString",
+	"StructHeadStringString",
+	"StructHeadOmitEmptyStringString",
+	"StructPtrHeadStringString",
+	"StructPtrHeadOmitEmptyStringString",
+	"StructHeadNumberString",
+	"StructHeadOmitEmptyNumberString",
+	"StructPtrHeadNumberString",
+	"StructPtrHeadOmitEmptyNumberString",
+	"StructHeadIntPtr",
+	"StructHeadOmitEmptyIntPtr",
+	"StructPtrHeadIntPtr",
+	"StructPtrHeadOmitEmptyIntPtr",
+	"StructHeadUintPtr",
+	"StructHeadOmitEmptyUintPtr",
+	"StructPtrHeadUintPtr",
+	"StructPtrHeadOmitEmptyUintPtr",
+	"StructHeadFloat32Ptr",
+	"StructHeadOmitEmptyFloat32Ptr",
+	"StructPtrHeadFloat32Ptr",
+	"StructPtrHeadOmitEmptyFloat32Ptr",
+	"StructHeadFloat64Ptr",
+	"StructHeadOmitEmptyFloat64Ptr",
+	"StructPtrHeadFloat64Ptr",
+	"StructPtrHeadOmitEmptyFloat64Ptr",
+	"StructHeadBoolPtr",
+	"StructHeadOmitEmptyBoolPtr",
+	"StructPtrHeadBoolPtr",
+	"StructPtrHeadOmitEmptyBoolPtr",
+	"StructHeadStringPtr",
+	"StructHeadOmitEmptyStringPtr",
+	"StructPtrHeadStringPtr",
+	"StructPtrHeadOmitEmptyStringPtr",
+	"StructHeadBytesPtr",
+	"StructHeadOmitEmptyBytesPtr",
+	"StructPtrHeadBytesPtr",
+	"StructPtrHeadOmitEmptyBytesPtr",
+	"StructHeadNumberPtr",
+	"StructHeadOmitEmptyNumberPtr",
+	"StructPtrHeadNumberPtr",
+	"StructPtrHeadOmitEmptyNumberPtr",
+	"StructHeadArrayPtr",
+	"StructHeadOmitEmptyArrayPtr",
+	"StructPtrHeadArrayPtr",
+	"StructPtrHeadOmitEmptyArrayPtr",
+	"StructHeadMapPtr",
+	"StructHeadOmitEmptyMapPtr",
+	"StructPtrHeadMapPtr",
+	"StructPtrHeadOmitEmptyMapPtr",
+	"StructHeadSlicePtr",
+	"StructHeadOmitEmptySlicePtr",
+	"StructPtrHeadSlicePtr",
+	"StructPtrHeadOmitEmptySlicePtr",
+	"StructHeadMarshalJSONPtr",
+	"StructHeadOmitEmptyMarshalJSONPtr",
+	"StructPtrHeadMarshalJSONPtr",
+	"StructPtrHeadOmitEmptyMarshalJSONPtr",
+	"StructHeadMarshalTextPtr",
+	"StructHeadOmitEmptyMarshalTextPtr",
+	"StructPtrHeadMarshalTextPtr",
+	"StructPtrHeadOmitEmptyMarshalTextPtr",
+	"StructHeadInterfacePtr",
+	"StructHeadOmitEmptyInterfacePtr",
+	"StructPtrHeadInterfacePtr",
+	"StructPtrHeadOmitEmptyInterfacePtr",
+	"StructHeadIntPtrString",
+	"StructHeadOmitEmptyIntPtrString",
+	"StructPtrHeadIntPtrString",
+	"StructPtrHeadOmitEmptyIntPtrString",
+	"StructHeadUintPtrString",
+	"StructHeadOmitEmptyUintPtrString",
+	"StructPtrHeadUintPtrString",
+	"StructPtrHeadOmitEmptyUintPtrString",
+	"StructHeadFloat32PtrString",
+	"StructHeadOmitEmptyFloat32PtrString",
+	"StructPtrHeadFloat32PtrString",
+	"StructPtrHeadOmitEmptyFloat32PtrString",
+	"StructHeadFloat64PtrString",
+	"StructHeadOmitEmptyFloat64PtrString",
+	"StructPtrHeadFloat64PtrString",
+	"StructPtrHeadOmitEmptyFloat64PtrString",
+	"StructHeadBoolPtrString",
+	"StructHeadOmitEmptyBoolPtrString",
+	"StructPtrHeadBoolPtrString",
+	"StructPtrHeadOmitEmptyBoolPtrString",
+	"StructHeadStringPtrString",
+	"StructHeadOmitEmptyStringPtrString",
+	"StructPtrHeadStringPtrString",
+	"StructPtrHeadOmitEmptyStringPtrString",
+	"StructHeadNumberPtrString",
+	"StructHeadOmitEmptyNumberPtrString",
+	"StructPtrHeadNumberPtrString",
+	"StructPtrHeadOmitEmptyNumberPtrString",
+	"StructHead",
+	"StructHeadOmitEmpty",
+	"StructPtrHead",
+	"StructPtrHeadOmitEmpty",
+	"StructFieldInt",
+	"StructFieldOmitEmptyInt",
+	"StructEndInt",
+	"StructEndOmitEmptyInt",
+	"StructFieldUint",
+	"StructFieldOmitEmptyUint",
+	"StructEndUint",
+	"StructEndOmitEmptyUint",
+	"StructFieldFloat32",
+	"StructFieldOmitEmptyFloat32",
+	"StructEndFloat32",
+	"StructEndOmitEmptyFloat32",
+	"StructFieldFloat64",
+	"StructFieldOmitEmptyFloat64",
+	"StructEndFloat64",
+	"StructEndOmitEmptyFloat64",
+	"StructFieldBool",
+	"StructFieldOmitEmptyBool",
+	"StructEndBool",
+	"StructEndOmitEmptyBool",
+	"StructFieldString",
+	"StructFieldOmitEmptyString",
+	"StructEndString",
+	"StructEndOmitEmptyString",
+	"StructFieldBytes",
+	"StructFieldOmitEmptyBytes",
+	"StructEndBytes",
+	"StructEndOmitEmptyBytes",
+	"StructFieldNumber",
+	"StructFieldOmitEmptyNumber",
+	"StructEndNumber",
+	"StructEndOmitEmptyNumber",
+	"StructFieldArray",
+	"StructFieldOmitEmptyArray",
+	"StructEndArray",
+	"StructEndOmitEmptyArray",
+	"StructFieldMap",
+	"StructFieldOmitEmptyMap",
+	"StructEndMap",
+	"StructEndOmitEmptyMap",
+	"StructFieldSlice",
+	"StructFieldOmitEmptySlice",
+	"StructEndSlice",
+	"StructEndOmitEmptySlice",
+	"StructFieldStruct",
+	"StructFieldOmitEmptyStruct",
+	"StructEndStruct",
+	"StructEndOmitEmptyStruct",
+	"StructFieldMarshalJSON",
+	"StructFieldOmitEmptyMarshalJSON",
+	"StructEndMarshalJSON",
+	"StructEndOmitEmptyMarshalJSON",
+	"StructFieldMarshalText",
+	"StructFieldOmitEmptyMarshalText",
+	"StructEndMarshalText",
+	"StructEndOmitEmptyMarshalText",
+	"StructFieldIntString",
+	"StructFieldOmitEmptyIntString",
+	"StructEndIntString",
+	"StructEndOmitEmptyIntString",
+	"StructFieldUintString",
+	"StructFieldOmitEmptyUintString",
+	"StructEndUintString",
+	"StructEndOmitEmptyUintString",
+	"StructFieldFloat32String",
+	"StructFieldOmitEmptyFloat32String",
+	"StructEndFloat32String",
+	"StructEndOmitEmptyFloat32String",
+	"StructFieldFloat64String",
+	"StructFieldOmitEmptyFloat64String",
+	"StructEndFloat64String",
+	"StructEndOmitEmptyFloat64String",
+	"StructFieldBoolString",
+	"StructFieldOmitEmptyBoolString",
+	"StructEndBoolString",
+	"StructEndOmitEmptyBoolString",
+	"StructFieldStringString",
+	"StructFieldOmitEmptyStringString",
+	"StructEndStringString",
+	"StructEndOmitEmptyStringString",
+	"StructFieldNumberString",
+	"StructFieldOmitEmptyNumberString",
+	"StructEndNumberString",
+	"StructEndOmitEmptyNumberString",
+	"StructFieldIntPtr",
+	"StructFieldOmitEmptyIntPtr",
+	"StructEndIntPtr",
+	"StructEndOmitEmptyIntPtr",
+	"StructFieldUintPtr",
+	"StructFieldOmitEmptyUintPtr",
+	"StructEndUintPtr",
+	"StructEndOmitEmptyUintPtr",
+	"StructFieldFloat32Ptr",
+	"StructFieldOmitEmptyFloat32Ptr",
+	"StructEndFloat32Ptr",
+	"StructEndOmitEmptyFloat32Ptr",
+	"StructFieldFloat64Ptr",
+	"StructFieldOmitEmptyFloat64Ptr",
+	"StructEndFloat64Ptr",
+	"StructEndOmitEmptyFloat64Ptr",
+	"StructFieldBoolPtr",
+	"StructFieldOmitEmptyBoolPtr",
+	"StructEndBoolPtr",
+	"StructEndOmitEmptyBoolPtr",
+	"StructFieldStringPtr",
+	"StructFieldOmitEmptyStringPtr",
+	"StructEndStringPtr",
+	"StructEndOmitEmptyStringPtr",
+	"StructFieldBytesPtr",
+	"StructFieldOmitEmptyBytesPtr",
+	"StructEndBytesPtr",
+	"StructEndOmitEmptyBytesPtr",
+	"StructFieldNumberPtr",
+	"StructFieldOmitEmptyNumberPtr",
+	"StructEndNumberPtr",
+	"StructEndOmitEmptyNumberPtr",
+	"StructFieldArrayPtr",
+	"StructFieldOmitEmptyArrayPtr",
+	"StructEndArrayPtr",
+	"StructEndOmitEmptyArrayPtr",
+	"StructFieldMapPtr",
+	"StructFieldOmitEmptyMapPtr",
+	"StructEndMapPtr",
+	"StructEndOmitEmptyMapPtr",
+	"StructFieldSlicePtr",
+	"StructFieldOmitEmptySlicePtr",
+	"StructEndSlicePtr",
+	"StructEndOmitEmptySlicePtr",
+	"StructFieldMarshalJSONPtr",
+	"StructFieldOmitEmptyMarshalJSONPtr",
+	"StructEndMarshalJSONPtr",
+	"StructEndOmitEmptyMarshalJSONPtr",
+	"StructFieldMarshalTextPtr",
+	"StructFieldOmitEmptyMarshalTextPtr",
+	"StructEndMarshalTextPtr",
+	"StructEndOmitEmptyMarshalTextPtr",
+	"StructFieldInterfacePtr",
+	"StructFieldOmitEmptyInterfacePtr",
+	"StructEndInterfacePtr",
+	"StructEndOmitEmptyInterfacePtr",
+	"StructFieldIntPtrString",
+	"StructFieldOmitEmptyIntPtrString",
+	"StructEndIntPtrString",
+	"StructEndOmitEmptyIntPtrString",
+	"StructFieldUintPtrString",
+	"StructFieldOmitEmptyUintPtrString",
+	"StructEndUintPtrString",
+	"StructEndOmitEmptyUintPtrString",
+	"StructFieldFloat32PtrString",
+	"StructFieldOmitEmptyFloat32PtrString",
+	"StructEndFloat32PtrString",
+	"StructEndOmitEmptyFloat32PtrString",
+	"StructFieldFloat64PtrString",
+	"StructFieldOmitEmptyFloat64PtrString",
+	"StructEndFloat64PtrString",
+	"StructEndOmitEmptyFloat64PtrString",
+	"StructFieldBoolPtrString",
+	"StructFieldOmitEmptyBoolPtrString",
+	"StructEndBoolPtrString",
+	"StructEndOmitEmptyBoolPtrString",
+	"StructFieldStringPtrString",
+	"StructFieldOmitEmptyStringPtrString",
+	"StructEndStringPtrString",
+	"StructEndOmitEmptyStringPtrString",
+	"StructFieldNumberPtrString",
+	"StructFieldOmitEmptyNumberPtrString",
+	"StructEndNumberPtrString",
+	"StructEndOmitEmptyNumberPtrString",
+	"StructField",
+	"StructFieldOmitEmpty",
+	"StructEnd",
+	"StructEndOmitEmpty",
+}
+
+type OpType uint16
+
+const (
+	OpEnd                                    OpType = 0
+	OpInterface                              OpType = 1
+	OpPtr                                    OpType = 2
+	OpSliceElem                              OpType = 3
+	OpSliceEnd                               OpType = 4
+	OpArrayElem                              OpType = 5
+	OpArrayEnd                               OpType = 6
+	OpMapKey                                 OpType = 7
+	OpMapValue                               OpType = 8
+	OpMapEnd                                 OpType = 9
+	OpRecursive                              OpType = 10
+	OpRecursivePtr                           OpType = 11
+	OpRecursiveEnd                           OpType = 12
+	OpInterfaceEnd                           OpType = 13
+	OpInt                                    OpType = 14
+	OpUint                                   OpType = 15
+	OpFloat32                                OpType = 16
+	OpFloat64                                OpType = 17
+	OpBool                                   OpType = 18
+	OpString                                 OpType = 19
+	OpBytes                                  OpType = 20
+	OpNumber                                 OpType = 21
+	OpArray                                  OpType = 22
+	OpMap                                    OpType = 23
+	OpSlice                                  OpType = 24
+	OpStruct                                 OpType = 25
+	OpMarshalJSON                            OpType = 26
+	OpMarshalText                            OpType = 27
+	OpIntString                              OpType = 28
+	OpUintString                             OpType = 29
+	OpFloat32String                          OpType = 30
+	OpFloat64String                          OpType = 31
+	OpBoolString                             OpType = 32
+	OpStringString                           OpType = 33
+	OpNumberString                           OpType = 34
+	OpIntPtr                                 OpType = 35
+	OpUintPtr                                OpType = 36
+	OpFloat32Ptr                             OpType = 37
+	OpFloat64Ptr                             OpType = 38
+	OpBoolPtr                                OpType = 39
+	OpStringPtr                              OpType = 40
+	OpBytesPtr                               OpType = 41
+	OpNumberPtr                              OpType = 42
+	OpArrayPtr                               OpType = 43
+	OpMapPtr                                 OpType = 44
+	OpSlicePtr                               OpType = 45
+	OpMarshalJSONPtr                         OpType = 46
+	OpMarshalTextPtr                         OpType = 47
+	OpInterfacePtr                           OpType = 48
+	OpIntPtrString                           OpType = 49
+	OpUintPtrString                          OpType = 50
+	OpFloat32PtrString                       OpType = 51
+	OpFloat64PtrString                       OpType = 52
+	OpBoolPtrString                          OpType = 53
+	OpStringPtrString                        OpType = 54
+	OpNumberPtrString                        OpType = 55
+	OpStructHeadInt                          OpType = 56
+	OpStructHeadOmitEmptyInt                 OpType = 57
+	OpStructPtrHeadInt                       OpType = 58
+	OpStructPtrHeadOmitEmptyInt              OpType = 59
+	OpStructHeadUint                         OpType = 60
+	OpStructHeadOmitEmptyUint                OpType = 61
+	OpStructPtrHeadUint                      OpType = 62
+	OpStructPtrHeadOmitEmptyUint             OpType = 63
+	OpStructHeadFloat32                      OpType = 64
+	OpStructHeadOmitEmptyFloat32             OpType = 65
+	OpStructPtrHeadFloat32                   OpType = 66
+	OpStructPtrHeadOmitEmptyFloat32          OpType = 67
+	OpStructHeadFloat64                      OpType = 68
+	OpStructHeadOmitEmptyFloat64             OpType = 69
+	OpStructPtrHeadFloat64                   OpType = 70
+	OpStructPtrHeadOmitEmptyFloat64          OpType = 71
+	OpStructHeadBool                         OpType = 72
+	OpStructHeadOmitEmptyBool                OpType = 73
+	OpStructPtrHeadBool                      OpType = 74
+	OpStructPtrHeadOmitEmptyBool             OpType = 75
+	OpStructHeadString                       OpType = 76
+	OpStructHeadOmitEmptyString              OpType = 77
+	OpStructPtrHeadString                    OpType = 78
+	OpStructPtrHeadOmitEmptyString           OpType = 79
+	OpStructHeadBytes                        OpType = 80
+	OpStructHeadOmitEmptyBytes               OpType = 81
+	OpStructPtrHeadBytes                     OpType = 82
+	OpStructPtrHeadOmitEmptyBytes            OpType = 83
+	OpStructHeadNumber                       OpType = 84
+	OpStructHeadOmitEmptyNumber              OpType = 85
+	OpStructPtrHeadNumber                    OpType = 86
+	OpStructPtrHeadOmitEmptyNumber           OpType = 87
+	OpStructHeadArray                        OpType = 88
+	OpStructHeadOmitEmptyArray               OpType = 89
+	OpStructPtrHeadArray                     OpType = 90
+	OpStructPtrHeadOmitEmptyArray            OpType = 91
+	OpStructHeadMap                          OpType = 92
+	OpStructHeadOmitEmptyMap                 OpType = 93
+	OpStructPtrHeadMap                       OpType = 94
+	OpStructPtrHeadOmitEmptyMap              OpType = 95
+	OpStructHeadSlice                        OpType = 96
+	OpStructHeadOmitEmptySlice               OpType = 97
+	OpStructPtrHeadSlice                     OpType = 98
+	OpStructPtrHeadOmitEmptySlice            OpType = 99
+	OpStructHeadStruct                       OpType = 100
+	OpStructHeadOmitEmptyStruct              OpType = 101
+	OpStructPtrHeadStruct                    OpType = 102
+	OpStructPtrHeadOmitEmptyStruct           OpType = 103
+	OpStructHeadMarshalJSON                  OpType = 104
+	OpStructHeadOmitEmptyMarshalJSON         OpType = 105
+	OpStructPtrHeadMarshalJSON               OpType = 106
+	OpStructPtrHeadOmitEmptyMarshalJSON      OpType = 107
+	OpStructHeadMarshalText                  OpType = 108
+	OpStructHeadOmitEmptyMarshalText         OpType = 109
+	OpStructPtrHeadMarshalText               OpType = 110
+	OpStructPtrHeadOmitEmptyMarshalText      OpType = 111
+	OpStructHeadIntString                    OpType = 112
+	OpStructHeadOmitEmptyIntString           OpType = 113
+	OpStructPtrHeadIntString                 OpType = 114
+	OpStructPtrHeadOmitEmptyIntString        OpType = 115
+	OpStructHeadUintString                   OpType = 116
+	OpStructHeadOmitEmptyUintString          OpType = 117
+	OpStructPtrHeadUintString                OpType = 118
+	OpStructPtrHeadOmitEmptyUintString       OpType = 119
+	OpStructHeadFloat32String                OpType = 120
+	OpStructHeadOmitEmptyFloat32String       OpType = 121
+	OpStructPtrHeadFloat32String             OpType = 122
+	OpStructPtrHeadOmitEmptyFloat32String    OpType = 123
+	OpStructHeadFloat64String                OpType = 124
+	OpStructHeadOmitEmptyFloat64String       OpType = 125
+	OpStructPtrHeadFloat64String             OpType = 126
+	OpStructPtrHeadOmitEmptyFloat64String    OpType = 127
+	OpStructHeadBoolString                   OpType = 128
+	OpStructHeadOmitEmptyBoolString          OpType = 129
+	OpStructPtrHeadBoolString                OpType = 130
+	OpStructPtrHeadOmitEmptyBoolString       OpType = 131
+	OpStructHeadStringString                 OpType = 132
+	OpStructHeadOmitEmptyStringString        OpType = 133
+	OpStructPtrHeadStringString              OpType = 134
+	OpStructPtrHeadOmitEmptyStringString     OpType = 135
+	OpStructHeadNumberString                 OpType = 136
+	OpStructHeadOmitEmptyNumberString        OpType = 137
+	OpStructPtrHeadNumberString              OpType = 138
+	OpStructPtrHeadOmitEmptyNumberString     OpType = 139
+	OpStructHeadIntPtr                       OpType = 140
+	OpStructHeadOmitEmptyIntPtr              OpType = 141
+	OpStructPtrHeadIntPtr                    OpType = 142
+	OpStructPtrHeadOmitEmptyIntPtr           OpType = 143
+	OpStructHeadUintPtr                      OpType = 144
+	OpStructHeadOmitEmptyUintPtr             OpType = 145
+	OpStructPtrHeadUintPtr                   OpType = 146
+	OpStructPtrHeadOmitEmptyUintPtr          OpType = 147
+	OpStructHeadFloat32Ptr                   OpType = 148
+	OpStructHeadOmitEmptyFloat32Ptr          OpType = 149
+	OpStructPtrHeadFloat32Ptr                OpType = 150
+	OpStructPtrHeadOmitEmptyFloat32Ptr       OpType = 151
+	OpStructHeadFloat64Ptr                   OpType = 152
+	OpStructHeadOmitEmptyFloat64Ptr          OpType = 153
+	OpStructPtrHeadFloat64Ptr                OpType = 154
+	OpStructPtrHeadOmitEmptyFloat64Ptr       OpType = 155
+	OpStructHeadBoolPtr                      OpType = 156
+	OpStructHeadOmitEmptyBoolPtr             OpType = 157
+	OpStructPtrHeadBoolPtr                   OpType = 158
+	OpStructPtrHeadOmitEmptyBoolPtr          OpType = 159
+	OpStructHeadStringPtr                    OpType = 160
+	OpStructHeadOmitEmptyStringPtr           OpType = 161
+	OpStructPtrHeadStringPtr                 OpType = 162
+	OpStructPtrHeadOmitEmptyStringPtr        OpType = 163
+	OpStructHeadBytesPtr                     OpType = 164
+	OpStructHeadOmitEmptyBytesPtr            OpType = 165
+	OpStructPtrHeadBytesPtr                  OpType = 166
+	OpStructPtrHeadOmitEmptyBytesPtr         OpType = 167
+	OpStructHeadNumberPtr                    OpType = 168
+	OpStructHeadOmitEmptyNumberPtr           OpType = 169
+	OpStructPtrHeadNumberPtr                 OpType = 170
+	OpStructPtrHeadOmitEmptyNumberPtr        OpType = 171
+	OpStructHeadArrayPtr                     OpType = 172
+	OpStructHeadOmitEmptyArrayPtr            OpType = 173
+	OpStructPtrHeadArrayPtr                  OpType = 174
+	OpStructPtrHeadOmitEmptyArrayPtr         OpType = 175
+	OpStructHeadMapPtr                       OpType = 176
+	OpStructHeadOmitEmptyMapPtr              OpType = 177
+	OpStructPtrHeadMapPtr                    OpType = 178
+	OpStructPtrHeadOmitEmptyMapPtr           OpType = 179
+	OpStructHeadSlicePtr                     OpType = 180
+	OpStructHeadOmitEmptySlicePtr            OpType = 181
+	OpStructPtrHeadSlicePtr                  OpType = 182
+	OpStructPtrHeadOmitEmptySlicePtr         OpType = 183
+	OpStructHeadMarshalJSONPtr               OpType = 184
+	OpStructHeadOmitEmptyMarshalJSONPtr      OpType = 185
+	OpStructPtrHeadMarshalJSONPtr            OpType = 186
+	OpStructPtrHeadOmitEmptyMarshalJSONPtr   OpType = 187
+	OpStructHeadMarshalTextPtr               OpType = 188
+	OpStructHeadOmitEmptyMarshalTextPtr      OpType = 189
+	OpStructPtrHeadMarshalTextPtr            OpType = 190
+	OpStructPtrHeadOmitEmptyMarshalTextPtr   OpType = 191
+	OpStructHeadInterfacePtr                 OpType = 192
+	OpStructHeadOmitEmptyInterfacePtr        OpType = 193
+	OpStructPtrHeadInterfacePtr              OpType = 194
+	OpStructPtrHeadOmitEmptyInterfacePtr     OpType = 195
+	OpStructHeadIntPtrString                 OpType = 196
+	OpStructHeadOmitEmptyIntPtrString        OpType = 197
+	OpStructPtrHeadIntPtrString              OpType = 198
+	OpStructPtrHeadOmitEmptyIntPtrString     OpType = 199
+	OpStructHeadUintPtrString                OpType = 200
+	OpStructHeadOmitEmptyUintPtrString       OpType = 201
+	OpStructPtrHeadUintPtrString             OpType = 202
+	OpStructPtrHeadOmitEmptyUintPtrString    OpType = 203
+	OpStructHeadFloat32PtrString             OpType = 204
+	OpStructHeadOmitEmptyFloat32PtrString    OpType = 205
+	OpStructPtrHeadFloat32PtrString          OpType = 206
+	OpStructPtrHeadOmitEmptyFloat32PtrString OpType = 207
+	OpStructHeadFloat64PtrString             OpType = 208
+	OpStructHeadOmitEmptyFloat64PtrString    OpType = 209
+	OpStructPtrHeadFloat64PtrString          OpType = 210
+	OpStructPtrHeadOmitEmptyFloat64PtrString OpType = 211
+	OpStructHeadBoolPtrString                OpType = 212
+	OpStructHeadOmitEmptyBoolPtrString       OpType = 213
+	OpStructPtrHeadBoolPtrString             OpType = 214
+	OpStructPtrHeadOmitEmptyBoolPtrString    OpType = 215
+	OpStructHeadStringPtrString              OpType = 216
+	OpStructHeadOmitEmptyStringPtrString     OpType = 217
+	OpStructPtrHeadStringPtrString           OpType = 218
+	OpStructPtrHeadOmitEmptyStringPtrString  OpType = 219
+	OpStructHeadNumberPtrString              OpType = 220
+	OpStructHeadOmitEmptyNumberPtrString     OpType = 221
+	OpStructPtrHeadNumberPtrString           OpType = 222
+	OpStructPtrHeadOmitEmptyNumberPtrString  OpType = 223
+	OpStructHead                             OpType = 224
+	OpStructHeadOmitEmpty                    OpType = 225
+	OpStructPtrHead                          OpType = 226
+	OpStructPtrHeadOmitEmpty                 OpType = 227
+	OpStructFieldInt                         OpType = 228
+	OpStructFieldOmitEmptyInt                OpType = 229
+	OpStructEndInt                           OpType = 230
+	OpStructEndOmitEmptyInt                  OpType = 231
+	OpStructFieldUint                        OpType = 232
+	OpStructFieldOmitEmptyUint               OpType = 233
+	OpStructEndUint                          OpType = 234
+	OpStructEndOmitEmptyUint                 OpType = 235
+	OpStructFieldFloat32                     OpType = 236
+	OpStructFieldOmitEmptyFloat32            OpType = 237
+	OpStructEndFloat32                       OpType = 238
+	OpStructEndOmitEmptyFloat32              OpType = 239
+	OpStructFieldFloat64                     OpType = 240
+	OpStructFieldOmitEmptyFloat64            OpType = 241
+	OpStructEndFloat64                       OpType = 242
+	OpStructEndOmitEmptyFloat64              OpType = 243
+	OpStructFieldBool                        OpType = 244
+	OpStructFieldOmitEmptyBool               OpType = 245
+	OpStructEndBool                          OpType = 246
+	OpStructEndOmitEmptyBool                 OpType = 247
+	OpStructFieldString                      OpType = 248
+	OpStructFieldOmitEmptyString             OpType = 249
+	OpStructEndString                        OpType = 250
+	OpStructEndOmitEmptyString               OpType = 251
+	OpStructFieldBytes                       OpType = 252
+	OpStructFieldOmitEmptyBytes              OpType = 253
+	OpStructEndBytes                         OpType = 254
+	OpStructEndOmitEmptyBytes                OpType = 255
+	OpStructFieldNumber                      OpType = 256
+	OpStructFieldOmitEmptyNumber             OpType = 257
+	OpStructEndNumber                        OpType = 258
+	OpStructEndOmitEmptyNumber               OpType = 259
+	OpStructFieldArray                       OpType = 260
+	OpStructFieldOmitEmptyArray              OpType = 261
+	OpStructEndArray                         OpType = 262
+	OpStructEndOmitEmptyArray                OpType = 263
+	OpStructFieldMap                         OpType = 264
+	OpStructFieldOmitEmptyMap                OpType = 265
+	OpStructEndMap                           OpType = 266
+	OpStructEndOmitEmptyMap                  OpType = 267
+	OpStructFieldSlice                       OpType = 268
+	OpStructFieldOmitEmptySlice              OpType = 269
+	OpStructEndSlice                         OpType = 270
+	OpStructEndOmitEmptySlice                OpType = 271
+	OpStructFieldStruct                      OpType = 272
+	OpStructFieldOmitEmptyStruct             OpType = 273
+	OpStructEndStruct                        OpType = 274
+	OpStructEndOmitEmptyStruct               OpType = 275
+	OpStructFieldMarshalJSON                 OpType = 276
+	OpStructFieldOmitEmptyMarshalJSON        OpType = 277
+	OpStructEndMarshalJSON                   OpType = 278
+	OpStructEndOmitEmptyMarshalJSON          OpType = 279
+	OpStructFieldMarshalText                 OpType = 280
+	OpStructFieldOmitEmptyMarshalText        OpType = 281
+	OpStructEndMarshalText                   OpType = 282
+	OpStructEndOmitEmptyMarshalText          OpType = 283
+	OpStructFieldIntString                   OpType = 284
+	OpStructFieldOmitEmptyIntString          OpType = 285
+	OpStructEndIntString                     OpType = 286
+	OpStructEndOmitEmptyIntString            OpType = 287
+	OpStructFieldUintString                  OpType = 288
+	OpStructFieldOmitEmptyUintString         OpType = 289
+	OpStructEndUintString                    OpType = 290
+	OpStructEndOmitEmptyUintString           OpType = 291
+	OpStructFieldFloat32String               OpType = 292
+	OpStructFieldOmitEmptyFloat32String      OpType = 293
+	OpStructEndFloat32String                 OpType = 294
+	OpStructEndOmitEmptyFloat32String        OpType = 295
+	OpStructFieldFloat64String               OpType = 296
+	OpStructFieldOmitEmptyFloat64String      OpType = 297
+	OpStructEndFloat64String                 OpType = 298
+	OpStructEndOmitEmptyFloat64String        OpType = 299
+	OpStructFieldBoolString                  OpType = 300
+	OpStructFieldOmitEmptyBoolString         OpType = 301
+	OpStructEndBoolString                    OpType = 302
+	OpStructEndOmitEmptyBoolString           OpType = 303
+	OpStructFieldStringString                OpType = 304
+	OpStructFieldOmitEmptyStringString       OpType = 305
+	OpStructEndStringString                  OpType = 306
+	OpStructEndOmitEmptyStringString         OpType = 307
+	OpStructFieldNumberString                OpType = 308
+	OpStructFieldOmitEmptyNumberString       OpType = 309
+	OpStructEndNumberString                  OpType = 310
+	OpStructEndOmitEmptyNumberString         OpType = 311
+	OpStructFieldIntPtr                      OpType = 312
+	OpStructFieldOmitEmptyIntPtr             OpType = 313
+	OpStructEndIntPtr                        OpType = 314
+	OpStructEndOmitEmptyIntPtr               OpType = 315
+	OpStructFieldUintPtr                     OpType = 316
+	OpStructFieldOmitEmptyUintPtr            OpType = 317
+	OpStructEndUintPtr                       OpType = 318
+	OpStructEndOmitEmptyUintPtr              OpType = 319
+	OpStructFieldFloat32Ptr                  OpType = 320
+	OpStructFieldOmitEmptyFloat32Ptr         OpType = 321
+	OpStructEndFloat32Ptr                    OpType = 322
+	OpStructEndOmitEmptyFloat32Ptr           OpType = 323
+	OpStructFieldFloat64Ptr                  OpType = 324
+	OpStructFieldOmitEmptyFloat64Ptr         OpType = 325
+	OpStructEndFloat64Ptr                    OpType = 326
+	OpStructEndOmitEmptyFloat64Ptr           OpType = 327
+	OpStructFieldBoolPtr                     OpType = 328
+	OpStructFieldOmitEmptyBoolPtr            OpType = 329
+	OpStructEndBoolPtr                       OpType = 330
+	OpStructEndOmitEmptyBoolPtr              OpType = 331
+	OpStructFieldStringPtr                   OpType = 332
+	OpStructFieldOmitEmptyStringPtr          OpType = 333
+	OpStructEndStringPtr                     OpType = 334
+	OpStructEndOmitEmptyStringPtr            OpType = 335
+	OpStructFieldBytesPtr                    OpType = 336
+	OpStructFieldOmitEmptyBytesPtr           OpType = 337
+	OpStructEndBytesPtr                      OpType = 338
+	OpStructEndOmitEmptyBytesPtr             OpType = 339
+	OpStructFieldNumberPtr                   OpType = 340
+	OpStructFieldOmitEmptyNumberPtr          OpType = 341
+	OpStructEndNumberPtr                     OpType = 342
+	OpStructEndOmitEmptyNumberPtr            OpType = 343
+	OpStructFieldArrayPtr                    OpType = 344
+	OpStructFieldOmitEmptyArrayPtr           OpType = 345
+	OpStructEndArrayPtr                      OpType = 346
+	OpStructEndOmitEmptyArrayPtr             OpType = 347
+	OpStructFieldMapPtr                      OpType = 348
+	OpStructFieldOmitEmptyMapPtr             OpType = 349
+	OpStructEndMapPtr                        OpType = 350
+	OpStructEndOmitEmptyMapPtr               OpType = 351
+	OpStructFieldSlicePtr                    OpType = 352
+	OpStructFieldOmitEmptySlicePtr           OpType = 353
+	OpStructEndSlicePtr                      OpType = 354
+	OpStructEndOmitEmptySlicePtr             OpType = 355
+	OpStructFieldMarshalJSONPtr              OpType = 356
+	OpStructFieldOmitEmptyMarshalJSONPtr     OpType = 357
+	OpStructEndMarshalJSONPtr                OpType = 358
+	OpStructEndOmitEmptyMarshalJSONPtr       OpType = 359
+	OpStructFieldMarshalTextPtr              OpType = 360
+	OpStructFieldOmitEmptyMarshalTextPtr     OpType = 361
+	OpStructEndMarshalTextPtr                OpType = 362
+	OpStructEndOmitEmptyMarshalTextPtr       OpType = 363
+	OpStructFieldInterfacePtr                OpType = 364
+	OpStructFieldOmitEmptyInterfacePtr       OpType = 365
+	OpStructEndInterfacePtr                  OpType = 366
+	OpStructEndOmitEmptyInterfacePtr         OpType = 367
+	OpStructFieldIntPtrString                OpType = 368
+	OpStructFieldOmitEmptyIntPtrString       OpType = 369
+	OpStructEndIntPtrString                  OpType = 370
+	OpStructEndOmitEmptyIntPtrString         OpType = 371
+	OpStructFieldUintPtrString               OpType = 372
+	OpStructFieldOmitEmptyUintPtrString      OpType = 373
+	OpStructEndUintPtrString                 OpType = 374
+	OpStructEndOmitEmptyUintPtrString        OpType = 375
+	OpStructFieldFloat32PtrString            OpType = 376
+	OpStructFieldOmitEmptyFloat32PtrString   OpType = 377
+	OpStructEndFloat32PtrString              OpType = 378
+	OpStructEndOmitEmptyFloat32PtrString     OpType = 379
+	OpStructFieldFloat64PtrString            OpType = 380
+	OpStructFieldOmitEmptyFloat64PtrString   OpType = 381
+	OpStructEndFloat64PtrString              OpType = 382
+	OpStructEndOmitEmptyFloat64PtrString     OpType = 383
+	OpStructFieldBoolPtrString               OpType = 384
+	OpStructFieldOmitEmptyBoolPtrString      OpType = 385
+	OpStructEndBoolPtrString                 OpType = 386
+	OpStructEndOmitEmptyBoolPtrString        OpType = 387
+	OpStructFieldStringPtrString             OpType = 388
+	OpStructFieldOmitEmptyStringPtrString    OpType = 389
+	OpStructEndStringPtrString               OpType = 390
+	OpStructEndOmitEmptyStringPtrString      OpType = 391
+	OpStructFieldNumberPtrString             OpType = 392
+	OpStructFieldOmitEmptyNumberPtrString    OpType = 393
+	OpStructEndNumberPtrString               OpType = 394
+	OpStructEndOmitEmptyNumberPtrString      OpType = 395
+	OpStructField                            OpType = 396
+	OpStructFieldOmitEmpty                   OpType = 397
+	OpStructEnd                              OpType = 398
+	OpStructEndOmitEmpty                     OpType = 399
+)
+
+func (t OpType) String() string {
+	if int(t) >= 400 {
+		return ""
+	}
+	return opTypeStrings[int(t)]
+}
+
+func (t OpType) CodeType() CodeType {
+	if strings.Contains(t.String(), "Struct") {
+		if strings.Contains(t.String(), "End") {
+			return CodeStructEnd
+		}
+		return CodeStructField
+	}
+	switch t {
+	case OpArray, OpArrayPtr:
+		return CodeArrayHead
+	case OpArrayElem:
+		return CodeArrayElem
+	case OpSlice, OpSlicePtr:
+		return CodeSliceHead
+	case OpSliceElem:
+		return CodeSliceElem
+	case OpMap, OpMapPtr:
+		return CodeMapHead
+	case OpMapKey:
+		return CodeMapKey
+	case OpMapValue:
+		return CodeMapValue
+	case OpMapEnd:
+		return CodeMapEnd
+	}
+
+	return CodeOp
+}
+
+func (t OpType) HeadToPtrHead() OpType {
+	if strings.Index(t.String(), "PtrHead") > 0 {
+		return t
+	}
+
+	idx := strings.Index(t.String(), "Head")
+	if idx == -1 {
+		return t
+	}
+	suffix := "PtrHead" + t.String()[idx+len("Head"):]
+
+	const toPtrOffset = 2
+	if strings.Contains(OpType(int(t)+toPtrOffset).String(), suffix) {
+		return OpType(int(t) + toPtrOffset)
+	}
+	return t
+}
+
+func (t OpType) HeadToOmitEmptyHead() OpType {
+	const toOmitEmptyOffset = 1
+	if strings.Contains(OpType(int(t)+toOmitEmptyOffset).String(), "OmitEmpty") {
+		return OpType(int(t) + toOmitEmptyOffset)
+	}
+
+	return t
+}
+
+func (t OpType) PtrHeadToHead() OpType {
+	idx := strings.Index(t.String(), "PtrHead")
+	if idx == -1 {
+		return t
+	}
+	suffix := t.String()[idx+len("Ptr"):]
+
+	const toPtrOffset = 2
+	if strings.Contains(OpType(int(t)-toPtrOffset).String(), suffix) {
+		return OpType(int(t) - toPtrOffset)
+	}
+	return t
+}
+
+func (t OpType) FieldToEnd() OpType {
+	idx := strings.Index(t.String(), "Field")
+	if idx == -1 {
+		return t
+	}
+	suffix := t.String()[idx+len("Field"):]
+	if suffix == "" || suffix == "OmitEmpty" {
+		return t
+	}
+	const toEndOffset = 2
+	if strings.Contains(OpType(int(t)+toEndOffset).String(), "End"+suffix) {
+		return OpType(int(t) + toEndOffset)
+	}
+	return t
+}
+
+func (t OpType) FieldToOmitEmptyField() OpType {
+	const toOmitEmptyOffset = 1
+	if strings.Contains(OpType(int(t)+toOmitEmptyOffset).String(), "OmitEmpty") {
+		return OpType(int(t) + toOmitEmptyOffset)
+	}
+	return t
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/query.go b/vendor/github.com/goccy/go-json/internal/encoder/query.go
new file mode 100644
index 00000000..1e1850cc
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/query.go
@@ -0,0 +1,135 @@
+package encoder
+
+import (
+	"context"
+	"fmt"
+	"reflect"
+)
+
+var (
+	Marshal   func(interface{}) ([]byte, error)
+	Unmarshal func([]byte, interface{}) error
+)
+
+type FieldQuery struct {
+	Name   string
+	Fields []*FieldQuery
+	hash   string
+}
+
+func (q *FieldQuery) Hash() string {
+	if q.hash != "" {
+		return q.hash
+	}
+	b, _ := Marshal(q)
+	q.hash = string(b)
+	return q.hash
+}
+
+func (q *FieldQuery) MarshalJSON() ([]byte, error) {
+	if q.Name != "" {
+		if len(q.Fields) > 0 {
+			return Marshal(map[string][]*FieldQuery{q.Name: q.Fields})
+		}
+		return Marshal(q.Name)
+	}
+	return Marshal(q.Fields)
+}
+
+func (q *FieldQuery) QueryString() (FieldQueryString, error) {
+	b, err := Marshal(q)
+	if err != nil {
+		return "", err
+	}
+	return FieldQueryString(b), nil
+}
+
+type FieldQueryString string
+
+func (s FieldQueryString) Build() (*FieldQuery, error) {
+	var query interface{}
+	if err := Unmarshal([]byte(s), &query); err != nil {
+		return nil, err
+	}
+	return s.build(reflect.ValueOf(query))
+}
+
+func (s FieldQueryString) build(v reflect.Value) (*FieldQuery, error) {
+	switch v.Type().Kind() {
+	case reflect.String:
+		return s.buildString(v)
+	case reflect.Map:
+		return s.buildMap(v)
+	case reflect.Slice:
+		return s.buildSlice(v)
+	case reflect.Interface:
+		return s.build(reflect.ValueOf(v.Interface()))
+	}
+	return nil, fmt.Errorf("failed to build field query")
+}
+
+func (s FieldQueryString) buildString(v reflect.Value) (*FieldQuery, error) {
+	b := []byte(v.String())
+	switch b[0] {
+	case '[', '{':
+		var query interface{}
+		if err := Unmarshal(b, &query); err != nil {
+			return nil, err
+		}
+		if str, ok := query.(string); ok {
+			return &FieldQuery{Name: str}, nil
+		}
+		return s.build(reflect.ValueOf(query))
+	}
+	return &FieldQuery{Name: string(b)}, nil
+}
+
+func (s FieldQueryString) buildSlice(v reflect.Value) (*FieldQuery, error) {
+	fields := make([]*FieldQuery, 0, v.Len())
+	for i := 0; i < v.Len(); i++ {
+		def, err := s.build(v.Index(i))
+		if err != nil {
+			return nil, err
+		}
+		fields = append(fields, def)
+	}
+	return &FieldQuery{Fields: fields}, nil
+}
+
+func (s FieldQueryString) buildMap(v reflect.Value) (*FieldQuery, error) {
+	keys := v.MapKeys()
+	if len(keys) != 1 {
+		return nil, fmt.Errorf("failed to build field query object")
+	}
+	key := keys[0]
+	if key.Type().Kind() != reflect.String {
+		return nil, fmt.Errorf("failed to build field query. invalid object key type")
+	}
+	name := key.String()
+	def, err := s.build(v.MapIndex(key))
+	if err != nil {
+		return nil, err
+	}
+	return &FieldQuery{
+		Name:   name,
+		Fields: def.Fields,
+	}, nil
+}
+
+type queryKey struct{}
+
+func FieldQueryFromContext(ctx context.Context) *FieldQuery {
+	query := ctx.Value(queryKey{})
+	if query == nil {
+		return nil
+	}
+	q, ok := query.(*FieldQuery)
+	if !ok {
+		return nil
+	}
+	return q
+}
+
+func SetFieldQueryToContext(ctx context.Context, query *FieldQuery) context.Context {
+	return context.WithValue(ctx, queryKey{}, query)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/string.go b/vendor/github.com/goccy/go-json/internal/encoder/string.go
new file mode 100644
index 00000000..4abb8416
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/string.go
@@ -0,0 +1,483 @@
+// This files's string processing codes are inspired by https://github.com/segmentio/encoding.
+// The license notation is as follows.
+//
+// # MIT License
+//
+// Copyright (c) 2019 Segment.io, Inc.
+//
+// Permission is hereby granted, free of charge, to any person obtaining a copy
+// of this software and associated documentation files (the "Software"), to deal
+// in the Software without restriction, including without limitation the rights
+// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the Software is
+// furnished to do so, subject to the following conditions:
+//
+// The above copyright notice and this permission notice shall be included in all
+// copies or substantial portions of the Software.
+//
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+// SOFTWARE.
+package encoder
+
+import (
+	"math/bits"
+	"reflect"
+	"unsafe"
+)
+
+const (
+	lsb = 0x0101010101010101
+	msb = 0x8080808080808080
+)
+
+var hex = "0123456789abcdef"
+
+//nolint:govet
+func stringToUint64Slice(s string) []uint64 {
+	return *(*[]uint64)(unsafe.Pointer(&reflect.SliceHeader{
+		Data: ((*reflect.StringHeader)(unsafe.Pointer(&s))).Data,
+		Len:  len(s) / 8,
+		Cap:  len(s) / 8,
+	}))
+}
+
+func AppendString(ctx *RuntimeContext, buf []byte, s string) []byte {
+	if ctx.Option.Flag&HTMLEscapeOption != 0 {
+		if ctx.Option.Flag&NormalizeUTF8Option != 0 {
+			return appendNormalizedHTMLString(buf, s)
+		}
+		return appendHTMLString(buf, s)
+	}
+	if ctx.Option.Flag&NormalizeUTF8Option != 0 {
+		return appendNormalizedString(buf, s)
+	}
+	return appendString(buf, s)
+}
+
+func appendNormalizedHTMLString(buf []byte, s string) []byte {
+	valLen := len(s)
+	if valLen == 0 {
+		return append(buf, `""`...)
+	}
+	buf = append(buf, '"')
+	var (
+		i, j int
+	)
+	if valLen >= 8 {
+		chunks := stringToUint64Slice(s)
+		for _, n := range chunks {
+			// combine masks before checking for the MSB of each byte. We include
+			// `n` in the mask to check whether any of the *input* byte MSBs were
+			// set (i.e. the byte was outside the ASCII range).
+			mask := n | (n - (lsb * 0x20)) |
+				((n ^ (lsb * '"')) - lsb) |
+				((n ^ (lsb * '\\')) - lsb) |
+				((n ^ (lsb * '<')) - lsb) |
+				((n ^ (lsb * '>')) - lsb) |
+				((n ^ (lsb * '&')) - lsb)
+			if (mask & msb) != 0 {
+				j = bits.TrailingZeros64(mask&msb) / 8
+				goto ESCAPE_END
+			}
+		}
+		for i := len(chunks) * 8; i < valLen; i++ {
+			if needEscapeHTMLNormalizeUTF8[s[i]] {
+				j = i
+				goto ESCAPE_END
+			}
+		}
+		// no found any escape characters.
+		return append(append(buf, s...), '"')
+	}
+ESCAPE_END:
+	for j < valLen {
+		c := s[j]
+
+		if !needEscapeHTMLNormalizeUTF8[c] {
+			// fast path: most of the time, printable ascii characters are used
+			j++
+			continue
+		}
+
+		switch c {
+		case '\\', '"':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', c)
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\n':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'n')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\r':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'r')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\t':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 't')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '<', '>', '&':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u00`...)
+			buf = append(buf, hex[c>>4], hex[c&0xF])
+			i = j + 1
+			j = j + 1
+			continue
+
+		case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F
+			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u00`...)
+			buf = append(buf, hex[c>>4], hex[c&0xF])
+			i = j + 1
+			j = j + 1
+			continue
+		}
+		state, size := decodeRuneInString(s[j:])
+		switch state {
+		case runeErrorState:
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\ufffd`...)
+			i = j + 1
+			j = j + 1
+			continue
+			// U+2028 is LINE SEPARATOR.
+			// U+2029 is PARAGRAPH SEPARATOR.
+			// They are both technically valid characters in JSON strings,
+			// but don't work in JSONP, which has to be evaluated as JavaScript,
+			// and can lead to security holes there. It is valid JSON to
+			// escape them, so we do so unconditionally.
+			// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
+		case lineSepState:
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u2028`...)
+			i = j + 3
+			j = j + 3
+			continue
+		case paragraphSepState:
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u2029`...)
+			i = j + 3
+			j = j + 3
+			continue
+		}
+		j += size
+	}
+
+	return append(append(buf, s[i:]...), '"')
+}
+
+func appendHTMLString(buf []byte, s string) []byte {
+	valLen := len(s)
+	if valLen == 0 {
+		return append(buf, `""`...)
+	}
+	buf = append(buf, '"')
+	var (
+		i, j int
+	)
+	if valLen >= 8 {
+		chunks := stringToUint64Slice(s)
+		for _, n := range chunks {
+			// combine masks before checking for the MSB of each byte. We include
+			// `n` in the mask to check whether any of the *input* byte MSBs were
+			// set (i.e. the byte was outside the ASCII range).
+			mask := n | (n - (lsb * 0x20)) |
+				((n ^ (lsb * '"')) - lsb) |
+				((n ^ (lsb * '\\')) - lsb) |
+				((n ^ (lsb * '<')) - lsb) |
+				((n ^ (lsb * '>')) - lsb) |
+				((n ^ (lsb * '&')) - lsb)
+			if (mask & msb) != 0 {
+				j = bits.TrailingZeros64(mask&msb) / 8
+				goto ESCAPE_END
+			}
+		}
+		for i := len(chunks) * 8; i < valLen; i++ {
+			if needEscapeHTML[s[i]] {
+				j = i
+				goto ESCAPE_END
+			}
+		}
+		// no found any escape characters.
+		return append(append(buf, s...), '"')
+	}
+ESCAPE_END:
+	for j < valLen {
+		c := s[j]
+
+		if !needEscapeHTML[c] {
+			// fast path: most of the time, printable ascii characters are used
+			j++
+			continue
+		}
+
+		switch c {
+		case '\\', '"':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', c)
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\n':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'n')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\r':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'r')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\t':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 't')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '<', '>', '&':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u00`...)
+			buf = append(buf, hex[c>>4], hex[c&0xF])
+			i = j + 1
+			j = j + 1
+			continue
+
+		case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F
+			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u00`...)
+			buf = append(buf, hex[c>>4], hex[c&0xF])
+			i = j + 1
+			j = j + 1
+			continue
+		}
+		j++
+	}
+
+	return append(append(buf, s[i:]...), '"')
+}
+
+func appendNormalizedString(buf []byte, s string) []byte {
+	valLen := len(s)
+	if valLen == 0 {
+		return append(buf, `""`...)
+	}
+	buf = append(buf, '"')
+	var (
+		i, j int
+	)
+	if valLen >= 8 {
+		chunks := stringToUint64Slice(s)
+		for _, n := range chunks {
+			// combine masks before checking for the MSB of each byte. We include
+			// `n` in the mask to check whether any of the *input* byte MSBs were
+			// set (i.e. the byte was outside the ASCII range).
+			mask := n | (n - (lsb * 0x20)) |
+				((n ^ (lsb * '"')) - lsb) |
+				((n ^ (lsb * '\\')) - lsb)
+			if (mask & msb) != 0 {
+				j = bits.TrailingZeros64(mask&msb) / 8
+				goto ESCAPE_END
+			}
+		}
+		valLen := len(s)
+		for i := len(chunks) * 8; i < valLen; i++ {
+			if needEscapeNormalizeUTF8[s[i]] {
+				j = i
+				goto ESCAPE_END
+			}
+		}
+		return append(append(buf, s...), '"')
+	}
+ESCAPE_END:
+	for j < valLen {
+		c := s[j]
+
+		if !needEscapeNormalizeUTF8[c] {
+			// fast path: most of the time, printable ascii characters are used
+			j++
+			continue
+		}
+
+		switch c {
+		case '\\', '"':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', c)
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\n':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'n')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\r':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'r')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\t':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 't')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F
+			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u00`...)
+			buf = append(buf, hex[c>>4], hex[c&0xF])
+			i = j + 1
+			j = j + 1
+			continue
+		}
+
+		state, size := decodeRuneInString(s[j:])
+		switch state {
+		case runeErrorState:
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\ufffd`...)
+			i = j + 1
+			j = j + 1
+			continue
+			// U+2028 is LINE SEPARATOR.
+			// U+2029 is PARAGRAPH SEPARATOR.
+			// They are both technically valid characters in JSON strings,
+			// but don't work in JSONP, which has to be evaluated as JavaScript,
+			// and can lead to security holes there. It is valid JSON to
+			// escape them, so we do so unconditionally.
+			// See http://timelessrepo.com/json-isnt-a-javascript-subset for discussion.
+		case lineSepState:
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u2028`...)
+			i = j + 3
+			j = j + 3
+			continue
+		case paragraphSepState:
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u2029`...)
+			i = j + 3
+			j = j + 3
+			continue
+		}
+		j += size
+	}
+
+	return append(append(buf, s[i:]...), '"')
+}
+
+func appendString(buf []byte, s string) []byte {
+	valLen := len(s)
+	if valLen == 0 {
+		return append(buf, `""`...)
+	}
+	buf = append(buf, '"')
+	var (
+		i, j int
+	)
+	if valLen >= 8 {
+		chunks := stringToUint64Slice(s)
+		for _, n := range chunks {
+			// combine masks before checking for the MSB of each byte. We include
+			// `n` in the mask to check whether any of the *input* byte MSBs were
+			// set (i.e. the byte was outside the ASCII range).
+			mask := n | (n - (lsb * 0x20)) |
+				((n ^ (lsb * '"')) - lsb) |
+				((n ^ (lsb * '\\')) - lsb)
+			if (mask & msb) != 0 {
+				j = bits.TrailingZeros64(mask&msb) / 8
+				goto ESCAPE_END
+			}
+		}
+		valLen := len(s)
+		for i := len(chunks) * 8; i < valLen; i++ {
+			if needEscape[s[i]] {
+				j = i
+				goto ESCAPE_END
+			}
+		}
+		return append(append(buf, s...), '"')
+	}
+ESCAPE_END:
+	for j < valLen {
+		c := s[j]
+
+		if !needEscape[c] {
+			// fast path: most of the time, printable ascii characters are used
+			j++
+			continue
+		}
+
+		switch c {
+		case '\\', '"':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', c)
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\n':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'n')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\r':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 'r')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case '\t':
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, '\\', 't')
+			i = j + 1
+			j = j + 1
+			continue
+
+		case 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x0B, 0x0C, 0x0E, 0x0F, // 0x00-0x0F
+			0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F: // 0x10-0x1F
+			buf = append(buf, s[i:j]...)
+			buf = append(buf, `\u00`...)
+			buf = append(buf, hex[c>>4], hex[c&0xF])
+			i = j + 1
+			j = j + 1
+			continue
+		}
+		j++
+	}
+
+	return append(append(buf, s[i:]...), '"')
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/string_table.go b/vendor/github.com/goccy/go-json/internal/encoder/string_table.go
new file mode 100644
index 00000000..ebe42c92
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/string_table.go
@@ -0,0 +1,415 @@
+package encoder
+
+var needEscapeHTMLNormalizeUTF8 = [256]bool{
+	'"':  true,
+	'&':  true,
+	'<':  true,
+	'>':  true,
+	'\\': true,
+	0x00: true,
+	0x01: true,
+	0x02: true,
+	0x03: true,
+	0x04: true,
+	0x05: true,
+	0x06: true,
+	0x07: true,
+	0x08: true,
+	0x09: true,
+	0x0a: true,
+	0x0b: true,
+	0x0c: true,
+	0x0d: true,
+	0x0e: true,
+	0x0f: true,
+	0x10: true,
+	0x11: true,
+	0x12: true,
+	0x13: true,
+	0x14: true,
+	0x15: true,
+	0x16: true,
+	0x17: true,
+	0x18: true,
+	0x19: true,
+	0x1a: true,
+	0x1b: true,
+	0x1c: true,
+	0x1d: true,
+	0x1e: true,
+	0x1f: true,
+	/* 0x20 - 0x7f */
+	0x80: true,
+	0x81: true,
+	0x82: true,
+	0x83: true,
+	0x84: true,
+	0x85: true,
+	0x86: true,
+	0x87: true,
+	0x88: true,
+	0x89: true,
+	0x8a: true,
+	0x8b: true,
+	0x8c: true,
+	0x8d: true,
+	0x8e: true,
+	0x8f: true,
+	0x90: true,
+	0x91: true,
+	0x92: true,
+	0x93: true,
+	0x94: true,
+	0x95: true,
+	0x96: true,
+	0x97: true,
+	0x98: true,
+	0x99: true,
+	0x9a: true,
+	0x9b: true,
+	0x9c: true,
+	0x9d: true,
+	0x9e: true,
+	0x9f: true,
+	0xa0: true,
+	0xa1: true,
+	0xa2: true,
+	0xa3: true,
+	0xa4: true,
+	0xa5: true,
+	0xa6: true,
+	0xa7: true,
+	0xa8: true,
+	0xa9: true,
+	0xaa: true,
+	0xab: true,
+	0xac: true,
+	0xad: true,
+	0xae: true,
+	0xaf: true,
+	0xb0: true,
+	0xb1: true,
+	0xb2: true,
+	0xb3: true,
+	0xb4: true,
+	0xb5: true,
+	0xb6: true,
+	0xb7: true,
+	0xb8: true,
+	0xb9: true,
+	0xba: true,
+	0xbb: true,
+	0xbc: true,
+	0xbd: true,
+	0xbe: true,
+	0xbf: true,
+	0xc0: true,
+	0xc1: true,
+	0xc2: true,
+	0xc3: true,
+	0xc4: true,
+	0xc5: true,
+	0xc6: true,
+	0xc7: true,
+	0xc8: true,
+	0xc9: true,
+	0xca: true,
+	0xcb: true,
+	0xcc: true,
+	0xcd: true,
+	0xce: true,
+	0xcf: true,
+	0xd0: true,
+	0xd1: true,
+	0xd2: true,
+	0xd3: true,
+	0xd4: true,
+	0xd5: true,
+	0xd6: true,
+	0xd7: true,
+	0xd8: true,
+	0xd9: true,
+	0xda: true,
+	0xdb: true,
+	0xdc: true,
+	0xdd: true,
+	0xde: true,
+	0xdf: true,
+	0xe0: true,
+	0xe1: true,
+	0xe2: true,
+	0xe3: true,
+	0xe4: true,
+	0xe5: true,
+	0xe6: true,
+	0xe7: true,
+	0xe8: true,
+	0xe9: true,
+	0xea: true,
+	0xeb: true,
+	0xec: true,
+	0xed: true,
+	0xee: true,
+	0xef: true,
+	0xf0: true,
+	0xf1: true,
+	0xf2: true,
+	0xf3: true,
+	0xf4: true,
+	0xf5: true,
+	0xf6: true,
+	0xf7: true,
+	0xf8: true,
+	0xf9: true,
+	0xfa: true,
+	0xfb: true,
+	0xfc: true,
+	0xfd: true,
+	0xfe: true,
+	0xff: true,
+}
+
+var needEscapeNormalizeUTF8 = [256]bool{
+	'"':  true,
+	'\\': true,
+	0x00: true,
+	0x01: true,
+	0x02: true,
+	0x03: true,
+	0x04: true,
+	0x05: true,
+	0x06: true,
+	0x07: true,
+	0x08: true,
+	0x09: true,
+	0x0a: true,
+	0x0b: true,
+	0x0c: true,
+	0x0d: true,
+	0x0e: true,
+	0x0f: true,
+	0x10: true,
+	0x11: true,
+	0x12: true,
+	0x13: true,
+	0x14: true,
+	0x15: true,
+	0x16: true,
+	0x17: true,
+	0x18: true,
+	0x19: true,
+	0x1a: true,
+	0x1b: true,
+	0x1c: true,
+	0x1d: true,
+	0x1e: true,
+	0x1f: true,
+	/* 0x20 - 0x7f */
+	0x80: true,
+	0x81: true,
+	0x82: true,
+	0x83: true,
+	0x84: true,
+	0x85: true,
+	0x86: true,
+	0x87: true,
+	0x88: true,
+	0x89: true,
+	0x8a: true,
+	0x8b: true,
+	0x8c: true,
+	0x8d: true,
+	0x8e: true,
+	0x8f: true,
+	0x90: true,
+	0x91: true,
+	0x92: true,
+	0x93: true,
+	0x94: true,
+	0x95: true,
+	0x96: true,
+	0x97: true,
+	0x98: true,
+	0x99: true,
+	0x9a: true,
+	0x9b: true,
+	0x9c: true,
+	0x9d: true,
+	0x9e: true,
+	0x9f: true,
+	0xa0: true,
+	0xa1: true,
+	0xa2: true,
+	0xa3: true,
+	0xa4: true,
+	0xa5: true,
+	0xa6: true,
+	0xa7: true,
+	0xa8: true,
+	0xa9: true,
+	0xaa: true,
+	0xab: true,
+	0xac: true,
+	0xad: true,
+	0xae: true,
+	0xaf: true,
+	0xb0: true,
+	0xb1: true,
+	0xb2: true,
+	0xb3: true,
+	0xb4: true,
+	0xb5: true,
+	0xb6: true,
+	0xb7: true,
+	0xb8: true,
+	0xb9: true,
+	0xba: true,
+	0xbb: true,
+	0xbc: true,
+	0xbd: true,
+	0xbe: true,
+	0xbf: true,
+	0xc0: true,
+	0xc1: true,
+	0xc2: true,
+	0xc3: true,
+	0xc4: true,
+	0xc5: true,
+	0xc6: true,
+	0xc7: true,
+	0xc8: true,
+	0xc9: true,
+	0xca: true,
+	0xcb: true,
+	0xcc: true,
+	0xcd: true,
+	0xce: true,
+	0xcf: true,
+	0xd0: true,
+	0xd1: true,
+	0xd2: true,
+	0xd3: true,
+	0xd4: true,
+	0xd5: true,
+	0xd6: true,
+	0xd7: true,
+	0xd8: true,
+	0xd9: true,
+	0xda: true,
+	0xdb: true,
+	0xdc: true,
+	0xdd: true,
+	0xde: true,
+	0xdf: true,
+	0xe0: true,
+	0xe1: true,
+	0xe2: true,
+	0xe3: true,
+	0xe4: true,
+	0xe5: true,
+	0xe6: true,
+	0xe7: true,
+	0xe8: true,
+	0xe9: true,
+	0xea: true,
+	0xeb: true,
+	0xec: true,
+	0xed: true,
+	0xee: true,
+	0xef: true,
+	0xf0: true,
+	0xf1: true,
+	0xf2: true,
+	0xf3: true,
+	0xf4: true,
+	0xf5: true,
+	0xf6: true,
+	0xf7: true,
+	0xf8: true,
+	0xf9: true,
+	0xfa: true,
+	0xfb: true,
+	0xfc: true,
+	0xfd: true,
+	0xfe: true,
+	0xff: true,
+}
+
+var needEscapeHTML = [256]bool{
+	'"':  true,
+	'&':  true,
+	'<':  true,
+	'>':  true,
+	'\\': true,
+	0x00: true,
+	0x01: true,
+	0x02: true,
+	0x03: true,
+	0x04: true,
+	0x05: true,
+	0x06: true,
+	0x07: true,
+	0x08: true,
+	0x09: true,
+	0x0a: true,
+	0x0b: true,
+	0x0c: true,
+	0x0d: true,
+	0x0e: true,
+	0x0f: true,
+	0x10: true,
+	0x11: true,
+	0x12: true,
+	0x13: true,
+	0x14: true,
+	0x15: true,
+	0x16: true,
+	0x17: true,
+	0x18: true,
+	0x19: true,
+	0x1a: true,
+	0x1b: true,
+	0x1c: true,
+	0x1d: true,
+	0x1e: true,
+	0x1f: true,
+	/* 0x20 - 0xff */
+}
+
+var needEscape = [256]bool{
+	'"':  true,
+	'\\': true,
+	0x00: true,
+	0x01: true,
+	0x02: true,
+	0x03: true,
+	0x04: true,
+	0x05: true,
+	0x06: true,
+	0x07: true,
+	0x08: true,
+	0x09: true,
+	0x0a: true,
+	0x0b: true,
+	0x0c: true,
+	0x0d: true,
+	0x0e: true,
+	0x0f: true,
+	0x10: true,
+	0x11: true,
+	0x12: true,
+	0x13: true,
+	0x14: true,
+	0x15: true,
+	0x16: true,
+	0x17: true,
+	0x18: true,
+	0x19: true,
+	0x1a: true,
+	0x1b: true,
+	0x1c: true,
+	0x1d: true,
+	0x1e: true,
+	0x1f: true,
+	/* 0x20 - 0xff */
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm/debug_vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm/debug_vm.go
new file mode 100644
index 00000000..82b6dd47
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm/debug_vm.go
@@ -0,0 +1,41 @@
+package vm
+
+import (
+	"fmt"
+	"io"
+
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+func DebugRun(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	defer func() {
+		var code *encoder.Opcode
+		if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+			code = codeSet.EscapeKeyCode
+		} else {
+			code = codeSet.NoescapeKeyCode
+		}
+		if wc := ctx.Option.DebugDOTOut; wc != nil {
+			_, _ = io.WriteString(wc, code.DumpDOT())
+			wc.Close()
+			ctx.Option.DebugDOTOut = nil
+		}
+
+		if err := recover(); err != nil {
+			w := ctx.Option.DebugOut
+			fmt.Fprintln(w, "=============[DEBUG]===============")
+			fmt.Fprintln(w, "* [TYPE]")
+			fmt.Fprintln(w, codeSet.Type)
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [ALL OPCODE]")
+			fmt.Fprintln(w, code.Dump())
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [CONTEXT]")
+			fmt.Fprintf(w, "%+v\n", ctx)
+			fmt.Fprintln(w, "===================================")
+			panic(err)
+		}
+	}()
+
+	return Run(ctx, b, codeSet)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm/hack.go b/vendor/github.com/goccy/go-json/internal/encoder/vm/hack.go
new file mode 100644
index 00000000..65252b4a
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm/hack.go
@@ -0,0 +1,9 @@
+package vm
+
+import (
+	// HACK: compile order
+	// `vm`, `vm_indent`, `vm_color`, `vm_color_indent` packages uses a lot of memory to compile,
+	// so forcibly make dependencies and avoid compiling in concurrent.
+	// dependency order: vm => vm_indent => vm_color => vm_color_indent
+	_ "github.com/goccy/go-json/internal/encoder/vm_indent"
+)
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm/util.go b/vendor/github.com/goccy/go-json/internal/encoder/vm/util.go
new file mode 100644
index 00000000..86291d7b
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm/util.go
@@ -0,0 +1,207 @@
+package vm
+
+import (
+	"encoding/json"
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+const uintptrSize = 4 << (^uintptr(0) >> 63)
+
+var (
+	appendInt           = encoder.AppendInt
+	appendUint          = encoder.AppendUint
+	appendFloat32       = encoder.AppendFloat32
+	appendFloat64       = encoder.AppendFloat64
+	appendString        = encoder.AppendString
+	appendByteSlice     = encoder.AppendByteSlice
+	appendNumber        = encoder.AppendNumber
+	errUnsupportedValue = encoder.ErrUnsupportedValue
+	errUnsupportedFloat = encoder.ErrUnsupportedFloat
+	mapiterinit         = encoder.MapIterInit
+	mapiterkey          = encoder.MapIterKey
+	mapitervalue        = encoder.MapIterValue
+	mapiternext         = encoder.MapIterNext
+	maplen              = encoder.MapLen
+)
+
+type emptyInterface struct {
+	typ *runtime.Type
+	ptr unsafe.Pointer
+}
+
+type nonEmptyInterface struct {
+	itab *struct {
+		ityp *runtime.Type // static interface type
+		typ  *runtime.Type // dynamic concrete type
+		// unused fields...
+	}
+	ptr unsafe.Pointer
+}
+
+func errUnimplementedOp(op encoder.OpType) error {
+	return fmt.Errorf("encoder: opcode %s has not been implemented", op)
+}
+
+func load(base uintptr, idx uint32) uintptr {
+	addr := base + uintptr(idx)
+	return **(**uintptr)(unsafe.Pointer(&addr))
+}
+
+func store(base uintptr, idx uint32, p uintptr) {
+	addr := base + uintptr(idx)
+	**(**uintptr)(unsafe.Pointer(&addr)) = p
+}
+
+func loadNPtr(base uintptr, idx uint32, ptrNum uint8) uintptr {
+	addr := base + uintptr(idx)
+	p := **(**uintptr)(unsafe.Pointer(&addr))
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUint64(p uintptr, bitSize uint8) uint64 {
+	switch bitSize {
+	case 8:
+		return (uint64)(**(**uint8)(unsafe.Pointer(&p)))
+	case 16:
+		return (uint64)(**(**uint16)(unsafe.Pointer(&p)))
+	case 32:
+		return (uint64)(**(**uint32)(unsafe.Pointer(&p)))
+	case 64:
+		return **(**uint64)(unsafe.Pointer(&p))
+	}
+	return 0
+}
+func ptrToFloat32(p uintptr) float32            { return **(**float32)(unsafe.Pointer(&p)) }
+func ptrToFloat64(p uintptr) float64            { return **(**float64)(unsafe.Pointer(&p)) }
+func ptrToBool(p uintptr) bool                  { return **(**bool)(unsafe.Pointer(&p)) }
+func ptrToBytes(p uintptr) []byte               { return **(**[]byte)(unsafe.Pointer(&p)) }
+func ptrToNumber(p uintptr) json.Number         { return **(**json.Number)(unsafe.Pointer(&p)) }
+func ptrToString(p uintptr) string              { return **(**string)(unsafe.Pointer(&p)) }
+func ptrToSlice(p uintptr) *runtime.SliceHeader { return *(**runtime.SliceHeader)(unsafe.Pointer(&p)) }
+func ptrToPtr(p uintptr) uintptr {
+	return uintptr(**(**unsafe.Pointer)(unsafe.Pointer(&p)))
+}
+func ptrToNPtr(p uintptr, ptrNum uint8) uintptr {
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUnsafePtr(p uintptr) unsafe.Pointer {
+	return *(*unsafe.Pointer)(unsafe.Pointer(&p))
+}
+func ptrToInterface(code *encoder.Opcode, p uintptr) interface{} {
+	return *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: code.Type,
+		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
+	}))
+}
+
+func appendBool(_ *encoder.RuntimeContext, b []byte, v bool) []byte {
+	if v {
+		return append(b, "true"...)
+	}
+	return append(b, "false"...)
+}
+
+func appendNull(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, "null"...)
+}
+
+func appendComma(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, ',')
+}
+
+func appendNullComma(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, "null,"...)
+}
+
+func appendColon(_ *encoder.RuntimeContext, b []byte) []byte {
+	last := len(b) - 1
+	b[last] = ':'
+	return b
+}
+
+func appendMapKeyValue(_ *encoder.RuntimeContext, _ *encoder.Opcode, b, key, value []byte) []byte {
+	b = append(b, key...)
+	b[len(b)-1] = ':'
+	return append(b, value...)
+}
+
+func appendMapEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	b[len(b)-1] = '}'
+	b = append(b, ',')
+	return b
+}
+
+func appendMarshalJSON(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	return encoder.AppendMarshalJSON(ctx, code, b, v)
+}
+
+func appendMarshalText(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	return encoder.AppendMarshalText(ctx, code, b, v)
+}
+
+func appendArrayHead(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	return append(b, '[')
+}
+
+func appendArrayEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	b[last] = ']'
+	return append(b, ',')
+}
+
+func appendEmptyArray(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '[', ']', ',')
+}
+
+func appendEmptyObject(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{', '}', ',')
+}
+
+func appendObjectEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	b[last] = '}'
+	return append(b, ',')
+}
+
+func appendStructHead(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{')
+}
+
+func appendStructKey(_ *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	return append(b, code.Key...)
+}
+
+func appendStructEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	return append(b, '}', ',')
+}
+
+func appendStructEndSkipLast(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	if b[last] == ',' {
+		b[last] = '}'
+		return appendComma(ctx, b)
+	}
+	return appendStructEnd(ctx, code, b)
+}
+
+func restoreIndent(_ *encoder.RuntimeContext, _ *encoder.Opcode, _ uintptr)               {}
+func storeIndent(_ uintptr, _ *encoder.Opcode, _ uintptr)                                 {}
+func appendMapKeyIndent(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte    { return b }
+func appendArrayElemIndent(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte { return b }
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm/vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm/vm.go
new file mode 100644
index 00000000..645d20f9
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm/vm.go
@@ -0,0 +1,4859 @@
+// Code generated by internal/cmd/generator. DO NOT EDIT!
+package vm
+
+import (
+	"math"
+	"reflect"
+	"sort"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	recursiveLevel := 0
+	ptrOffset := uintptr(0)
+	ctxptr := ctx.Ptr()
+	var code *encoder.Opcode
+	if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+		code = codeSet.EscapeKeyCode
+	} else {
+		code = codeSet.NoescapeKeyCode
+	}
+
+	for {
+		switch code.Op {
+		default:
+			return nil, errUnimplementedOp(code.Op)
+		case encoder.OpPtr:
+			p := load(ctxptr, code.Idx)
+			code = code.Next
+			store(ctxptr, code.Idx, ptrToPtr(p))
+		case encoder.OpIntPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInt:
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpUint:
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpIntString:
+			b = append(b, '"')
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintString:
+			b = append(b, '"')
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat32Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat32:
+			b = appendFloat32(ctx, b, ptrToFloat32(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat64Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat64:
+			v := ptrToFloat64(load(ctxptr, code.Idx))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStringPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpString:
+			b = appendString(ctx, b, ptrToString(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBoolPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBool:
+			b = appendBool(ctx, b, ptrToBool(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBytesPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBytes:
+			b = appendByteSlice(ctx, b, ptrToBytes(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpNumberPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpNumber:
+			bb, err := appendNumber(ctx, b, ptrToNumber(load(ctxptr, code.Idx)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpInterfacePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInterface:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if recursiveLevel > encoder.StartDetectingCyclesAfter {
+				for _, seen := range ctx.SeenPtr {
+					if p == seen {
+						return nil, errUnsupportedValue(code, p)
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, p)
+			var (
+				typ      *runtime.Type
+				ifacePtr unsafe.Pointer
+			)
+			up := ptrToUnsafePtr(p)
+			if code.Flags&encoder.NonEmptyInterfaceFlags != 0 {
+				iface := (*nonEmptyInterface)(up)
+				ifacePtr = iface.ptr
+				if iface.itab != nil {
+					typ = iface.itab.typ
+				}
+			} else {
+				iface := (*emptyInterface)(up)
+				ifacePtr = iface.ptr
+				typ = iface.typ
+			}
+			if ifacePtr == nil {
+				isDirectedNil := typ != nil && typ.Kind() == reflect.Struct && !runtime.IfaceIndir(typ)
+				if !isDirectedNil {
+					b = appendNullComma(ctx, b)
+					code = code.Next
+					break
+				}
+			}
+			ctx.KeepRefs = append(ctx.KeepRefs, up)
+			ifaceCodeSet, err := encoder.CompileToGetCodeSet(ctx, uintptr(unsafe.Pointer(typ)))
+			if err != nil {
+				return nil, err
+			}
+
+			totalLength := uintptr(code.Length) + 3
+			nextTotalLength := uintptr(ifaceCodeSet.CodeLength) + 3
+
+			var c *encoder.Opcode
+			if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+				c = ifaceCodeSet.InterfaceEscapeKeyCode
+			} else {
+				c = ifaceCodeSet.InterfaceNoescapeKeyCode
+			}
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += totalLength * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			ctx.BaseIndent += code.Indent
+
+			newLen := offsetNum + totalLength + nextTotalLength
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			end := ifaceCodeSet.EndCode
+			store(ctxptr, c.Idx, uintptr(ifacePtr))
+			store(ctxptr, end.Idx, oldOffset)
+			store(ctxptr, end.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, end, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpInterfaceEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			offset := load(ctxptr, code.Idx)
+			restoreIndent(ctx, code, ctxptr)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = append(b, `""`...)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpSlicePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpSlice:
+			p := load(ctxptr, code.Idx)
+			slice := ptrToSlice(p)
+			if p == 0 || slice.Data == nil {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.ElemIdx, 0)
+			store(ctxptr, code.Length, uintptr(slice.Len))
+			store(ctxptr, code.Idx, uintptr(slice.Data))
+			if slice.Len > 0 {
+				b = appendArrayHead(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, uintptr(slice.Data))
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpSliceElem:
+			idx := load(ctxptr, code.ElemIdx)
+			length := load(ctxptr, code.Length)
+			idx++
+			if idx < length {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				data := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, data+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			if code.Length > 0 {
+				b = appendArrayHead(ctx, code, b)
+				store(ctxptr, code.ElemIdx, 0)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayElem:
+			idx := load(ctxptr, code.ElemIdx)
+			idx++
+			if idx < uintptr(code.Length) {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				p := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, p+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpMapPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			uptr := ptrToUnsafePtr(p)
+			mlen := maplen(uptr)
+			if mlen <= 0 {
+				b = appendEmptyObject(ctx, b)
+				code = code.End.Next
+				break
+			}
+			b = appendStructHead(ctx, b)
+			unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
+			mapCtx := encoder.NewMapContext(mlen, unorderedMap)
+			mapiterinit(code.Type, uptr, &mapCtx.Iter)
+			store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
+			ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
+			if unorderedMap {
+				b = appendMapKeyIndent(ctx, code.Next, b)
+			} else {
+				mapCtx.Start = len(b)
+				mapCtx.First = len(b)
+			}
+			key := mapiterkey(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(key))
+			code = code.Next
+		case encoder.OpMapKey:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			idx := mapCtx.Idx
+			idx++
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				if idx < mapCtx.Len {
+					b = appendMapKeyIndent(ctx, code, b)
+					mapCtx.Idx = int(idx)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					b = appendObjectEnd(ctx, code, b)
+					encoder.ReleaseMapContext(mapCtx)
+					code = code.End.Next
+				}
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Value = b[mapCtx.Start:len(b)]
+				if idx < mapCtx.Len {
+					mapCtx.Idx = int(idx)
+					mapCtx.Start = len(b)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					code = code.End
+				}
+			}
+		case encoder.OpMapValue:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				b = appendColon(ctx, b)
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Key = b[mapCtx.Start:len(b)]
+				mapCtx.Start = len(b)
+			}
+			value := mapitervalue(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(value))
+			mapiternext(&mapCtx.Iter)
+			code = code.Next
+		case encoder.OpMapEnd:
+			// this operation only used by sorted map.
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			sort.Sort(mapCtx.Slice)
+			buf := mapCtx.Buf
+			for _, item := range mapCtx.Slice.Items {
+				buf = appendMapKeyValue(ctx, code, buf, item.Key, item.Value)
+			}
+			buf = appendMapEnd(ctx, code, buf)
+			b = b[:mapCtx.First]
+			b = append(b, buf...)
+			mapCtx.Buf = buf
+			encoder.ReleaseMapContext(mapCtx)
+			code = code.Next
+		case encoder.OpRecursivePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpRecursive:
+			ptr := load(ctxptr, code.Idx)
+			if ptr != 0 {
+				if recursiveLevel > encoder.StartDetectingCyclesAfter {
+					for _, seen := range ctx.SeenPtr {
+						if ptr == seen {
+							return nil, errUnsupportedValue(code, ptr)
+						}
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, ptr)
+			c := code.Jmp.Code
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += code.Jmp.CurLen * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			indentDiffFromTop := c.Indent - 1
+			ctx.BaseIndent += code.Indent - indentDiffFromTop
+
+			newLen := offsetNum + code.Jmp.CurLen + code.Jmp.NextLen
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			store(ctxptr, c.Idx, ptr)
+			store(ctxptr, c.End.Next.Idx, oldOffset)
+			store(ctxptr, c.End.Next.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, c.End.Next, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpRecursiveEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			restoreIndent(ctx, code, ctxptr)
+			offset := load(ctxptr, code.Idx)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpStructPtrHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if len(code.Key) > 0 {
+				if (code.Flags&encoder.IsTaggedKeyFlags) != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+					b = appendStructKey(ctx, code, b)
+				}
+			}
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if p == 0 || (ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			u64 := ptrToUint64(p, code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNull(ctx, b)
+					b = appendComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p+uintptr(code.Offset)))))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadArray, encoder.OpStructPtrHeadSlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadArray, encoder.OpStructHeadSlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyArray:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			b = appendStructKey(ctx, code, b)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptySlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadArrayPtr, encoder.OpStructPtrHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadArrayPtr, encoder.OpStructHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyArrayPtr, encoder.OpStructPtrHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArrayPtr, encoder.OpStructHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			if maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, iface)
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructField:
+			if code.Flags&encoder.IsTaggedKeyFlags != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+				b = appendStructKey(ctx, code, b)
+			}
+			p := load(ctxptr, code.Idx) + uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringString:
+			p := load(ctxptr, code.Idx)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalJSON(ctx, code, b, iface)
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalText:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldArrayPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArrayPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldSlice:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldSlicePtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldMap:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 || maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldMapPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldStruct:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyStruct:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructEnd:
+			b = appendStructEndSkipLast(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendStructEnd(ctx, code, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytesPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendStructEnd(ctx, code, bb)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpEnd:
+			goto END
+		}
+	}
+END:
+	return b, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_color/debug_vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/debug_vm.go
new file mode 100644
index 00000000..925f61ed
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/debug_vm.go
@@ -0,0 +1,35 @@
+package vm_color
+
+import (
+	"fmt"
+
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+func DebugRun(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	var code *encoder.Opcode
+	if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+		code = codeSet.EscapeKeyCode
+	} else {
+		code = codeSet.NoescapeKeyCode
+	}
+
+	defer func() {
+		if err := recover(); err != nil {
+			w := ctx.Option.DebugOut
+			fmt.Fprintln(w, "=============[DEBUG]===============")
+			fmt.Fprintln(w, "* [TYPE]")
+			fmt.Fprintln(w, codeSet.Type)
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [ALL OPCODE]")
+			fmt.Fprintln(w, code.Dump())
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [CONTEXT]")
+			fmt.Fprintf(w, "%+v\n", ctx)
+			fmt.Fprintln(w, "===================================")
+			panic(err)
+		}
+	}()
+
+	return Run(ctx, b, codeSet)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_color/hack.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/hack.go
new file mode 100644
index 00000000..12ec56c5
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/hack.go
@@ -0,0 +1,9 @@
+package vm_color
+
+import (
+	// HACK: compile order
+	// `vm`, `vm_indent`, `vm_color`, `vm_color_indent` packages uses a lot of memory to compile,
+	// so forcibly make dependencies and avoid compiling in concurrent.
+	// dependency order: vm => vm_indent => vm_color => vm_color_indent
+	_ "github.com/goccy/go-json/internal/encoder/vm_color_indent"
+)
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_color/util.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/util.go
new file mode 100644
index 00000000..33f29aee
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/util.go
@@ -0,0 +1,274 @@
+package vm_color
+
+import (
+	"encoding/json"
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+const uintptrSize = 4 << (^uintptr(0) >> 63)
+
+var (
+	errUnsupportedValue = encoder.ErrUnsupportedValue
+	errUnsupportedFloat = encoder.ErrUnsupportedFloat
+	mapiterinit         = encoder.MapIterInit
+	mapiterkey          = encoder.MapIterKey
+	mapitervalue        = encoder.MapIterValue
+	mapiternext         = encoder.MapIterNext
+	maplen              = encoder.MapLen
+)
+
+type emptyInterface struct {
+	typ *runtime.Type
+	ptr unsafe.Pointer
+}
+
+type nonEmptyInterface struct {
+	itab *struct {
+		ityp *runtime.Type // static interface type
+		typ  *runtime.Type // dynamic concrete type
+		// unused fields...
+	}
+	ptr unsafe.Pointer
+}
+
+func errUnimplementedOp(op encoder.OpType) error {
+	return fmt.Errorf("encoder: opcode %s has not been implemented", op)
+}
+
+func load(base uintptr, idx uint32) uintptr {
+	addr := base + uintptr(idx)
+	return **(**uintptr)(unsafe.Pointer(&addr))
+}
+
+func store(base uintptr, idx uint32, p uintptr) {
+	addr := base + uintptr(idx)
+	**(**uintptr)(unsafe.Pointer(&addr)) = p
+}
+
+func loadNPtr(base uintptr, idx uint32, ptrNum uint8) uintptr {
+	addr := base + uintptr(idx)
+	p := **(**uintptr)(unsafe.Pointer(&addr))
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUint64(p uintptr, bitSize uint8) uint64 {
+	switch bitSize {
+	case 8:
+		return (uint64)(**(**uint8)(unsafe.Pointer(&p)))
+	case 16:
+		return (uint64)(**(**uint16)(unsafe.Pointer(&p)))
+	case 32:
+		return (uint64)(**(**uint32)(unsafe.Pointer(&p)))
+	case 64:
+		return **(**uint64)(unsafe.Pointer(&p))
+	}
+	return 0
+}
+func ptrToFloat32(p uintptr) float32            { return **(**float32)(unsafe.Pointer(&p)) }
+func ptrToFloat64(p uintptr) float64            { return **(**float64)(unsafe.Pointer(&p)) }
+func ptrToBool(p uintptr) bool                  { return **(**bool)(unsafe.Pointer(&p)) }
+func ptrToBytes(p uintptr) []byte               { return **(**[]byte)(unsafe.Pointer(&p)) }
+func ptrToNumber(p uintptr) json.Number         { return **(**json.Number)(unsafe.Pointer(&p)) }
+func ptrToString(p uintptr) string              { return **(**string)(unsafe.Pointer(&p)) }
+func ptrToSlice(p uintptr) *runtime.SliceHeader { return *(**runtime.SliceHeader)(unsafe.Pointer(&p)) }
+func ptrToPtr(p uintptr) uintptr {
+	return uintptr(**(**unsafe.Pointer)(unsafe.Pointer(&p)))
+}
+func ptrToNPtr(p uintptr, ptrNum uint8) uintptr {
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUnsafePtr(p uintptr) unsafe.Pointer {
+	return *(*unsafe.Pointer)(unsafe.Pointer(&p))
+}
+func ptrToInterface(code *encoder.Opcode, p uintptr) interface{} {
+	return *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: code.Type,
+		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
+	}))
+}
+
+func appendInt(ctx *encoder.RuntimeContext, b []byte, p uintptr, code *encoder.Opcode) []byte {
+	format := ctx.Option.ColorScheme.Int
+	b = append(b, format.Header...)
+	b = encoder.AppendInt(ctx, b, p, code)
+	return append(b, format.Footer...)
+}
+
+func appendUint(ctx *encoder.RuntimeContext, b []byte, p uintptr, code *encoder.Opcode) []byte {
+	format := ctx.Option.ColorScheme.Uint
+	b = append(b, format.Header...)
+	b = encoder.AppendUint(ctx, b, p, code)
+	return append(b, format.Footer...)
+}
+
+func appendFloat32(ctx *encoder.RuntimeContext, b []byte, v float32) []byte {
+	format := ctx.Option.ColorScheme.Float
+	b = append(b, format.Header...)
+	b = encoder.AppendFloat32(ctx, b, v)
+	return append(b, format.Footer...)
+}
+
+func appendFloat64(ctx *encoder.RuntimeContext, b []byte, v float64) []byte {
+	format := ctx.Option.ColorScheme.Float
+	b = append(b, format.Header...)
+	b = encoder.AppendFloat64(ctx, b, v)
+	return append(b, format.Footer...)
+}
+
+func appendString(ctx *encoder.RuntimeContext, b []byte, v string) []byte {
+	format := ctx.Option.ColorScheme.String
+	b = append(b, format.Header...)
+	b = encoder.AppendString(ctx, b, v)
+	return append(b, format.Footer...)
+}
+
+func appendByteSlice(ctx *encoder.RuntimeContext, b []byte, src []byte) []byte {
+	format := ctx.Option.ColorScheme.Binary
+	b = append(b, format.Header...)
+	b = encoder.AppendByteSlice(ctx, b, src)
+	return append(b, format.Footer...)
+}
+
+func appendNumber(ctx *encoder.RuntimeContext, b []byte, n json.Number) ([]byte, error) {
+	format := ctx.Option.ColorScheme.Int
+	b = append(b, format.Header...)
+	bb, err := encoder.AppendNumber(ctx, b, n)
+	if err != nil {
+		return nil, err
+	}
+	return append(bb, format.Footer...), nil
+}
+
+func appendBool(ctx *encoder.RuntimeContext, b []byte, v bool) []byte {
+	format := ctx.Option.ColorScheme.Bool
+	b = append(b, format.Header...)
+	if v {
+		b = append(b, "true"...)
+	} else {
+		b = append(b, "false"...)
+	}
+	return append(b, format.Footer...)
+}
+
+func appendNull(ctx *encoder.RuntimeContext, b []byte) []byte {
+	format := ctx.Option.ColorScheme.Null
+	b = append(b, format.Header...)
+	b = append(b, "null"...)
+	return append(b, format.Footer...)
+}
+
+func appendComma(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, ',')
+}
+
+func appendNullComma(ctx *encoder.RuntimeContext, b []byte) []byte {
+	format := ctx.Option.ColorScheme.Null
+	b = append(b, format.Header...)
+	b = append(b, "null"...)
+	return append(append(b, format.Footer...), ',')
+}
+
+func appendColon(_ *encoder.RuntimeContext, b []byte) []byte {
+	last := len(b) - 1
+	b[last] = ':'
+	return b
+}
+
+func appendMapKeyValue(_ *encoder.RuntimeContext, _ *encoder.Opcode, b, key, value []byte) []byte {
+	b = append(b, key[:len(key)-1]...)
+	b = append(b, ':')
+	return append(b, value...)
+}
+
+func appendMapEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	b[last] = '}'
+	b = append(b, ',')
+	return b
+}
+
+func appendMarshalJSON(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	return encoder.AppendMarshalJSON(ctx, code, b, v)
+}
+
+func appendMarshalText(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	format := ctx.Option.ColorScheme.String
+	b = append(b, format.Header...)
+	bb, err := encoder.AppendMarshalText(ctx, code, b, v)
+	if err != nil {
+		return nil, err
+	}
+	return append(bb, format.Footer...), nil
+}
+
+func appendArrayHead(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	return append(b, '[')
+}
+
+func appendArrayEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	b[last] = ']'
+	return append(b, ',')
+}
+
+func appendEmptyArray(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '[', ']', ',')
+}
+
+func appendEmptyObject(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{', '}', ',')
+}
+
+func appendObjectEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	b[last] = '}'
+	return append(b, ',')
+}
+
+func appendStructHead(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{')
+}
+
+func appendStructKey(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	format := ctx.Option.ColorScheme.ObjectKey
+	b = append(b, format.Header...)
+	b = append(b, code.Key[:len(code.Key)-1]...)
+	b = append(b, format.Footer...)
+
+	return append(b, ':')
+}
+
+func appendStructEnd(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte {
+	return append(b, '}', ',')
+}
+
+func appendStructEndSkipLast(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	if b[last] == ',' {
+		b[last] = '}'
+		return appendComma(ctx, b)
+	}
+	return appendStructEnd(ctx, code, b)
+}
+
+func restoreIndent(_ *encoder.RuntimeContext, _ *encoder.Opcode, _ uintptr)               {}
+func storeIndent(_ uintptr, _ *encoder.Opcode, _ uintptr)                                 {}
+func appendMapKeyIndent(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte    { return b }
+func appendArrayElemIndent(_ *encoder.RuntimeContext, _ *encoder.Opcode, b []byte) []byte { return b }
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_color/vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/vm.go
new file mode 100644
index 00000000..a63e83e5
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_color/vm.go
@@ -0,0 +1,4859 @@
+// Code generated by internal/cmd/generator. DO NOT EDIT!
+package vm_color
+
+import (
+	"math"
+	"reflect"
+	"sort"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	recursiveLevel := 0
+	ptrOffset := uintptr(0)
+	ctxptr := ctx.Ptr()
+	var code *encoder.Opcode
+	if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+		code = codeSet.EscapeKeyCode
+	} else {
+		code = codeSet.NoescapeKeyCode
+	}
+
+	for {
+		switch code.Op {
+		default:
+			return nil, errUnimplementedOp(code.Op)
+		case encoder.OpPtr:
+			p := load(ctxptr, code.Idx)
+			code = code.Next
+			store(ctxptr, code.Idx, ptrToPtr(p))
+		case encoder.OpIntPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInt:
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpUint:
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpIntString:
+			b = append(b, '"')
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintString:
+			b = append(b, '"')
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat32Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat32:
+			b = appendFloat32(ctx, b, ptrToFloat32(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat64Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat64:
+			v := ptrToFloat64(load(ctxptr, code.Idx))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStringPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpString:
+			b = appendString(ctx, b, ptrToString(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBoolPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBool:
+			b = appendBool(ctx, b, ptrToBool(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBytesPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBytes:
+			b = appendByteSlice(ctx, b, ptrToBytes(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpNumberPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpNumber:
+			bb, err := appendNumber(ctx, b, ptrToNumber(load(ctxptr, code.Idx)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpInterfacePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInterface:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if recursiveLevel > encoder.StartDetectingCyclesAfter {
+				for _, seen := range ctx.SeenPtr {
+					if p == seen {
+						return nil, errUnsupportedValue(code, p)
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, p)
+			var (
+				typ      *runtime.Type
+				ifacePtr unsafe.Pointer
+			)
+			up := ptrToUnsafePtr(p)
+			if code.Flags&encoder.NonEmptyInterfaceFlags != 0 {
+				iface := (*nonEmptyInterface)(up)
+				ifacePtr = iface.ptr
+				if iface.itab != nil {
+					typ = iface.itab.typ
+				}
+			} else {
+				iface := (*emptyInterface)(up)
+				ifacePtr = iface.ptr
+				typ = iface.typ
+			}
+			if ifacePtr == nil {
+				isDirectedNil := typ != nil && typ.Kind() == reflect.Struct && !runtime.IfaceIndir(typ)
+				if !isDirectedNil {
+					b = appendNullComma(ctx, b)
+					code = code.Next
+					break
+				}
+			}
+			ctx.KeepRefs = append(ctx.KeepRefs, up)
+			ifaceCodeSet, err := encoder.CompileToGetCodeSet(ctx, uintptr(unsafe.Pointer(typ)))
+			if err != nil {
+				return nil, err
+			}
+
+			totalLength := uintptr(code.Length) + 3
+			nextTotalLength := uintptr(ifaceCodeSet.CodeLength) + 3
+
+			var c *encoder.Opcode
+			if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+				c = ifaceCodeSet.InterfaceEscapeKeyCode
+			} else {
+				c = ifaceCodeSet.InterfaceNoescapeKeyCode
+			}
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += totalLength * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			ctx.BaseIndent += code.Indent
+
+			newLen := offsetNum + totalLength + nextTotalLength
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			end := ifaceCodeSet.EndCode
+			store(ctxptr, c.Idx, uintptr(ifacePtr))
+			store(ctxptr, end.Idx, oldOffset)
+			store(ctxptr, end.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, end, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpInterfaceEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			offset := load(ctxptr, code.Idx)
+			restoreIndent(ctx, code, ctxptr)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = append(b, `""`...)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpSlicePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpSlice:
+			p := load(ctxptr, code.Idx)
+			slice := ptrToSlice(p)
+			if p == 0 || slice.Data == nil {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.ElemIdx, 0)
+			store(ctxptr, code.Length, uintptr(slice.Len))
+			store(ctxptr, code.Idx, uintptr(slice.Data))
+			if slice.Len > 0 {
+				b = appendArrayHead(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, uintptr(slice.Data))
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpSliceElem:
+			idx := load(ctxptr, code.ElemIdx)
+			length := load(ctxptr, code.Length)
+			idx++
+			if idx < length {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				data := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, data+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			if code.Length > 0 {
+				b = appendArrayHead(ctx, code, b)
+				store(ctxptr, code.ElemIdx, 0)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayElem:
+			idx := load(ctxptr, code.ElemIdx)
+			idx++
+			if idx < uintptr(code.Length) {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				p := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, p+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpMapPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			uptr := ptrToUnsafePtr(p)
+			mlen := maplen(uptr)
+			if mlen <= 0 {
+				b = appendEmptyObject(ctx, b)
+				code = code.End.Next
+				break
+			}
+			b = appendStructHead(ctx, b)
+			unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
+			mapCtx := encoder.NewMapContext(mlen, unorderedMap)
+			mapiterinit(code.Type, uptr, &mapCtx.Iter)
+			store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
+			ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
+			if unorderedMap {
+				b = appendMapKeyIndent(ctx, code.Next, b)
+			} else {
+				mapCtx.Start = len(b)
+				mapCtx.First = len(b)
+			}
+			key := mapiterkey(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(key))
+			code = code.Next
+		case encoder.OpMapKey:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			idx := mapCtx.Idx
+			idx++
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				if idx < mapCtx.Len {
+					b = appendMapKeyIndent(ctx, code, b)
+					mapCtx.Idx = int(idx)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					b = appendObjectEnd(ctx, code, b)
+					encoder.ReleaseMapContext(mapCtx)
+					code = code.End.Next
+				}
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Value = b[mapCtx.Start:len(b)]
+				if idx < mapCtx.Len {
+					mapCtx.Idx = int(idx)
+					mapCtx.Start = len(b)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					code = code.End
+				}
+			}
+		case encoder.OpMapValue:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				b = appendColon(ctx, b)
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Key = b[mapCtx.Start:len(b)]
+				mapCtx.Start = len(b)
+			}
+			value := mapitervalue(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(value))
+			mapiternext(&mapCtx.Iter)
+			code = code.Next
+		case encoder.OpMapEnd:
+			// this operation only used by sorted map.
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			sort.Sort(mapCtx.Slice)
+			buf := mapCtx.Buf
+			for _, item := range mapCtx.Slice.Items {
+				buf = appendMapKeyValue(ctx, code, buf, item.Key, item.Value)
+			}
+			buf = appendMapEnd(ctx, code, buf)
+			b = b[:mapCtx.First]
+			b = append(b, buf...)
+			mapCtx.Buf = buf
+			encoder.ReleaseMapContext(mapCtx)
+			code = code.Next
+		case encoder.OpRecursivePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpRecursive:
+			ptr := load(ctxptr, code.Idx)
+			if ptr != 0 {
+				if recursiveLevel > encoder.StartDetectingCyclesAfter {
+					for _, seen := range ctx.SeenPtr {
+						if ptr == seen {
+							return nil, errUnsupportedValue(code, ptr)
+						}
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, ptr)
+			c := code.Jmp.Code
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += code.Jmp.CurLen * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			indentDiffFromTop := c.Indent - 1
+			ctx.BaseIndent += code.Indent - indentDiffFromTop
+
+			newLen := offsetNum + code.Jmp.CurLen + code.Jmp.NextLen
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			store(ctxptr, c.Idx, ptr)
+			store(ctxptr, c.End.Next.Idx, oldOffset)
+			store(ctxptr, c.End.Next.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, c.End.Next, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpRecursiveEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			restoreIndent(ctx, code, ctxptr)
+			offset := load(ctxptr, code.Idx)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpStructPtrHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if len(code.Key) > 0 {
+				if (code.Flags&encoder.IsTaggedKeyFlags) != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+					b = appendStructKey(ctx, code, b)
+				}
+			}
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if p == 0 || (ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			u64 := ptrToUint64(p, code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNull(ctx, b)
+					b = appendComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p+uintptr(code.Offset)))))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadArray, encoder.OpStructPtrHeadSlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadArray, encoder.OpStructHeadSlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyArray:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			b = appendStructKey(ctx, code, b)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptySlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadArrayPtr, encoder.OpStructPtrHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadArrayPtr, encoder.OpStructHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyArrayPtr, encoder.OpStructPtrHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArrayPtr, encoder.OpStructHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			if maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, iface)
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructField:
+			if code.Flags&encoder.IsTaggedKeyFlags != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+				b = appendStructKey(ctx, code, b)
+			}
+			p := load(ctxptr, code.Idx) + uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringString:
+			p := load(ctxptr, code.Idx)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalJSON(ctx, code, b, iface)
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalText:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldArrayPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArrayPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldSlice:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldSlicePtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldMap:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 || maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldMapPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldStruct:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyStruct:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructEnd:
+			b = appendStructEndSkipLast(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendStructEnd(ctx, code, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytesPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendStructEnd(ctx, code, bb)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpEnd:
+			goto END
+		}
+	}
+END:
+	return b, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/debug_vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/debug_vm.go
new file mode 100644
index 00000000..dd4cd489
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/debug_vm.go
@@ -0,0 +1,35 @@
+package vm_color_indent
+
+import (
+	"fmt"
+
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+func DebugRun(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	var code *encoder.Opcode
+	if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+		code = codeSet.EscapeKeyCode
+	} else {
+		code = codeSet.NoescapeKeyCode
+	}
+
+	defer func() {
+		if err := recover(); err != nil {
+			w := ctx.Option.DebugOut
+			fmt.Fprintln(w, "=============[DEBUG]===============")
+			fmt.Fprintln(w, "* [TYPE]")
+			fmt.Fprintln(w, codeSet.Type)
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [ALL OPCODE]")
+			fmt.Fprintln(w, code.Dump())
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [CONTEXT]")
+			fmt.Fprintf(w, "%+v\n", ctx)
+			fmt.Fprintln(w, "===================================")
+			panic(err)
+		}
+	}()
+
+	return Run(ctx, b, codeSet)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/util.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/util.go
new file mode 100644
index 00000000..2395abec
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/util.go
@@ -0,0 +1,297 @@
+package vm_color_indent
+
+import (
+	"encoding/json"
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+const uintptrSize = 4 << (^uintptr(0) >> 63)
+
+var (
+	appendIndent        = encoder.AppendIndent
+	appendStructEnd     = encoder.AppendStructEndIndent
+	errUnsupportedValue = encoder.ErrUnsupportedValue
+	errUnsupportedFloat = encoder.ErrUnsupportedFloat
+	mapiterinit         = encoder.MapIterInit
+	mapiterkey          = encoder.MapIterKey
+	mapitervalue        = encoder.MapIterValue
+	mapiternext         = encoder.MapIterNext
+	maplen              = encoder.MapLen
+)
+
+type emptyInterface struct {
+	typ *runtime.Type
+	ptr unsafe.Pointer
+}
+
+type nonEmptyInterface struct {
+	itab *struct {
+		ityp *runtime.Type // static interface type
+		typ  *runtime.Type // dynamic concrete type
+		// unused fields...
+	}
+	ptr unsafe.Pointer
+}
+
+func errUnimplementedOp(op encoder.OpType) error {
+	return fmt.Errorf("encoder (indent): opcode %s has not been implemented", op)
+}
+
+func load(base uintptr, idx uint32) uintptr {
+	addr := base + uintptr(idx)
+	return **(**uintptr)(unsafe.Pointer(&addr))
+}
+
+func store(base uintptr, idx uint32, p uintptr) {
+	addr := base + uintptr(idx)
+	**(**uintptr)(unsafe.Pointer(&addr)) = p
+}
+
+func loadNPtr(base uintptr, idx uint32, ptrNum uint8) uintptr {
+	addr := base + uintptr(idx)
+	p := **(**uintptr)(unsafe.Pointer(&addr))
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUint64(p uintptr, bitSize uint8) uint64 {
+	switch bitSize {
+	case 8:
+		return (uint64)(**(**uint8)(unsafe.Pointer(&p)))
+	case 16:
+		return (uint64)(**(**uint16)(unsafe.Pointer(&p)))
+	case 32:
+		return (uint64)(**(**uint32)(unsafe.Pointer(&p)))
+	case 64:
+		return **(**uint64)(unsafe.Pointer(&p))
+	}
+	return 0
+}
+
+func ptrToFloat32(p uintptr) float32            { return **(**float32)(unsafe.Pointer(&p)) }
+func ptrToFloat64(p uintptr) float64            { return **(**float64)(unsafe.Pointer(&p)) }
+func ptrToBool(p uintptr) bool                  { return **(**bool)(unsafe.Pointer(&p)) }
+func ptrToBytes(p uintptr) []byte               { return **(**[]byte)(unsafe.Pointer(&p)) }
+func ptrToNumber(p uintptr) json.Number         { return **(**json.Number)(unsafe.Pointer(&p)) }
+func ptrToString(p uintptr) string              { return **(**string)(unsafe.Pointer(&p)) }
+func ptrToSlice(p uintptr) *runtime.SliceHeader { return *(**runtime.SliceHeader)(unsafe.Pointer(&p)) }
+func ptrToPtr(p uintptr) uintptr {
+	return uintptr(**(**unsafe.Pointer)(unsafe.Pointer(&p)))
+}
+func ptrToNPtr(p uintptr, ptrNum uint8) uintptr {
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUnsafePtr(p uintptr) unsafe.Pointer {
+	return *(*unsafe.Pointer)(unsafe.Pointer(&p))
+}
+func ptrToInterface(code *encoder.Opcode, p uintptr) interface{} {
+	return *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: code.Type,
+		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
+	}))
+}
+
+func appendInt(ctx *encoder.RuntimeContext, b []byte, p uintptr, code *encoder.Opcode) []byte {
+	format := ctx.Option.ColorScheme.Int
+	b = append(b, format.Header...)
+	b = encoder.AppendInt(ctx, b, p, code)
+	return append(b, format.Footer...)
+}
+
+func appendUint(ctx *encoder.RuntimeContext, b []byte, p uintptr, code *encoder.Opcode) []byte {
+	format := ctx.Option.ColorScheme.Uint
+	b = append(b, format.Header...)
+	b = encoder.AppendUint(ctx, b, p, code)
+	return append(b, format.Footer...)
+}
+
+func appendFloat32(ctx *encoder.RuntimeContext, b []byte, v float32) []byte {
+	format := ctx.Option.ColorScheme.Float
+	b = append(b, format.Header...)
+	b = encoder.AppendFloat32(ctx, b, v)
+	return append(b, format.Footer...)
+}
+
+func appendFloat64(ctx *encoder.RuntimeContext, b []byte, v float64) []byte {
+	format := ctx.Option.ColorScheme.Float
+	b = append(b, format.Header...)
+	b = encoder.AppendFloat64(ctx, b, v)
+	return append(b, format.Footer...)
+}
+
+func appendString(ctx *encoder.RuntimeContext, b []byte, v string) []byte {
+	format := ctx.Option.ColorScheme.String
+	b = append(b, format.Header...)
+	b = encoder.AppendString(ctx, b, v)
+	return append(b, format.Footer...)
+}
+
+func appendByteSlice(ctx *encoder.RuntimeContext, b []byte, src []byte) []byte {
+	format := ctx.Option.ColorScheme.Binary
+	b = append(b, format.Header...)
+	b = encoder.AppendByteSlice(ctx, b, src)
+	return append(b, format.Footer...)
+}
+
+func appendNumber(ctx *encoder.RuntimeContext, b []byte, n json.Number) ([]byte, error) {
+	format := ctx.Option.ColorScheme.Int
+	b = append(b, format.Header...)
+	bb, err := encoder.AppendNumber(ctx, b, n)
+	if err != nil {
+		return nil, err
+	}
+	return append(bb, format.Footer...), nil
+}
+
+func appendBool(ctx *encoder.RuntimeContext, b []byte, v bool) []byte {
+	format := ctx.Option.ColorScheme.Bool
+	b = append(b, format.Header...)
+	if v {
+		b = append(b, "true"...)
+	} else {
+		b = append(b, "false"...)
+	}
+	return append(b, format.Footer...)
+}
+
+func appendNull(ctx *encoder.RuntimeContext, b []byte) []byte {
+	format := ctx.Option.ColorScheme.Null
+	b = append(b, format.Header...)
+	b = append(b, "null"...)
+	return append(b, format.Footer...)
+}
+
+func appendComma(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, ',', '\n')
+}
+
+func appendNullComma(ctx *encoder.RuntimeContext, b []byte) []byte {
+	format := ctx.Option.ColorScheme.Null
+	b = append(b, format.Header...)
+	b = append(b, "null"...)
+	return append(append(b, format.Footer...), ',', '\n')
+}
+
+func appendColon(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b[:len(b)-2], ':', ' ')
+}
+
+func appendMapKeyValue(ctx *encoder.RuntimeContext, code *encoder.Opcode, b, key, value []byte) []byte {
+	b = appendIndent(ctx, b, code.Indent+1)
+	b = append(b, key...)
+	b[len(b)-2] = ':'
+	b[len(b)-1] = ' '
+	return append(b, value...)
+}
+
+func appendMapEnd(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = b[:len(b)-2]
+	b = append(b, '\n')
+	b = appendIndent(ctx, b, code.Indent)
+	return append(b, '}', ',', '\n')
+}
+
+func appendArrayHead(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = append(b, '[', '\n')
+	return appendIndent(ctx, b, code.Indent+1)
+}
+
+func appendArrayEnd(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = b[:len(b)-2]
+	b = append(b, '\n')
+	b = appendIndent(ctx, b, code.Indent)
+	return append(b, ']', ',', '\n')
+}
+
+func appendEmptyArray(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '[', ']', ',', '\n')
+}
+
+func appendEmptyObject(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{', '}', ',', '\n')
+}
+
+func appendObjectEnd(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	// replace comma to newline
+	b[last-1] = '\n'
+	b = appendIndent(ctx, b[:last], code.Indent)
+	return append(b, '}', ',', '\n')
+}
+
+func appendMarshalJSON(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	return encoder.AppendMarshalJSONIndent(ctx, code, b, v)
+}
+
+func appendMarshalText(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	format := ctx.Option.ColorScheme.String
+	b = append(b, format.Header...)
+	bb, err := encoder.AppendMarshalTextIndent(ctx, code, b, v)
+	if err != nil {
+		return nil, err
+	}
+	return append(bb, format.Footer...), nil
+}
+
+func appendStructHead(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{', '\n')
+}
+
+func appendStructKey(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = appendIndent(ctx, b, code.Indent)
+
+	format := ctx.Option.ColorScheme.ObjectKey
+	b = append(b, format.Header...)
+	b = append(b, code.Key[:len(code.Key)-1]...)
+	b = append(b, format.Footer...)
+
+	return append(b, ':', ' ')
+}
+
+func appendStructEndSkipLast(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	if b[last-1] == '{' {
+		b[last] = '}'
+	} else {
+		if b[last] == '\n' {
+			// to remove ',' and '\n' characters
+			b = b[:len(b)-2]
+		}
+		b = append(b, '\n')
+		b = appendIndent(ctx, b, code.Indent-1)
+		b = append(b, '}')
+	}
+	return appendComma(ctx, b)
+}
+
+func restoreIndent(ctx *encoder.RuntimeContext, code *encoder.Opcode, ctxptr uintptr) {
+	ctx.BaseIndent = uint32(load(ctxptr, code.Length))
+}
+
+func storeIndent(ctxptr uintptr, code *encoder.Opcode, indent uintptr) {
+	store(ctxptr, code.Length, indent)
+}
+
+func appendArrayElemIndent(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	return appendIndent(ctx, b, code.Indent+1)
+}
+
+func appendMapKeyIndent(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	return appendIndent(ctx, b, code.Indent)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/vm.go
new file mode 100644
index 00000000..3b4e22e5
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_color_indent/vm.go
@@ -0,0 +1,4859 @@
+// Code generated by internal/cmd/generator. DO NOT EDIT!
+package vm_color_indent
+
+import (
+	"math"
+	"reflect"
+	"sort"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	recursiveLevel := 0
+	ptrOffset := uintptr(0)
+	ctxptr := ctx.Ptr()
+	var code *encoder.Opcode
+	if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+		code = codeSet.EscapeKeyCode
+	} else {
+		code = codeSet.NoescapeKeyCode
+	}
+
+	for {
+		switch code.Op {
+		default:
+			return nil, errUnimplementedOp(code.Op)
+		case encoder.OpPtr:
+			p := load(ctxptr, code.Idx)
+			code = code.Next
+			store(ctxptr, code.Idx, ptrToPtr(p))
+		case encoder.OpIntPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInt:
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpUint:
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpIntString:
+			b = append(b, '"')
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintString:
+			b = append(b, '"')
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat32Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat32:
+			b = appendFloat32(ctx, b, ptrToFloat32(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat64Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat64:
+			v := ptrToFloat64(load(ctxptr, code.Idx))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStringPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpString:
+			b = appendString(ctx, b, ptrToString(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBoolPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBool:
+			b = appendBool(ctx, b, ptrToBool(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBytesPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBytes:
+			b = appendByteSlice(ctx, b, ptrToBytes(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpNumberPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpNumber:
+			bb, err := appendNumber(ctx, b, ptrToNumber(load(ctxptr, code.Idx)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpInterfacePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInterface:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if recursiveLevel > encoder.StartDetectingCyclesAfter {
+				for _, seen := range ctx.SeenPtr {
+					if p == seen {
+						return nil, errUnsupportedValue(code, p)
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, p)
+			var (
+				typ      *runtime.Type
+				ifacePtr unsafe.Pointer
+			)
+			up := ptrToUnsafePtr(p)
+			if code.Flags&encoder.NonEmptyInterfaceFlags != 0 {
+				iface := (*nonEmptyInterface)(up)
+				ifacePtr = iface.ptr
+				if iface.itab != nil {
+					typ = iface.itab.typ
+				}
+			} else {
+				iface := (*emptyInterface)(up)
+				ifacePtr = iface.ptr
+				typ = iface.typ
+			}
+			if ifacePtr == nil {
+				isDirectedNil := typ != nil && typ.Kind() == reflect.Struct && !runtime.IfaceIndir(typ)
+				if !isDirectedNil {
+					b = appendNullComma(ctx, b)
+					code = code.Next
+					break
+				}
+			}
+			ctx.KeepRefs = append(ctx.KeepRefs, up)
+			ifaceCodeSet, err := encoder.CompileToGetCodeSet(ctx, uintptr(unsafe.Pointer(typ)))
+			if err != nil {
+				return nil, err
+			}
+
+			totalLength := uintptr(code.Length) + 3
+			nextTotalLength := uintptr(ifaceCodeSet.CodeLength) + 3
+
+			var c *encoder.Opcode
+			if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+				c = ifaceCodeSet.InterfaceEscapeKeyCode
+			} else {
+				c = ifaceCodeSet.InterfaceNoescapeKeyCode
+			}
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += totalLength * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			ctx.BaseIndent += code.Indent
+
+			newLen := offsetNum + totalLength + nextTotalLength
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			end := ifaceCodeSet.EndCode
+			store(ctxptr, c.Idx, uintptr(ifacePtr))
+			store(ctxptr, end.Idx, oldOffset)
+			store(ctxptr, end.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, end, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpInterfaceEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			offset := load(ctxptr, code.Idx)
+			restoreIndent(ctx, code, ctxptr)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = append(b, `""`...)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpSlicePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpSlice:
+			p := load(ctxptr, code.Idx)
+			slice := ptrToSlice(p)
+			if p == 0 || slice.Data == nil {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.ElemIdx, 0)
+			store(ctxptr, code.Length, uintptr(slice.Len))
+			store(ctxptr, code.Idx, uintptr(slice.Data))
+			if slice.Len > 0 {
+				b = appendArrayHead(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, uintptr(slice.Data))
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpSliceElem:
+			idx := load(ctxptr, code.ElemIdx)
+			length := load(ctxptr, code.Length)
+			idx++
+			if idx < length {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				data := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, data+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			if code.Length > 0 {
+				b = appendArrayHead(ctx, code, b)
+				store(ctxptr, code.ElemIdx, 0)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayElem:
+			idx := load(ctxptr, code.ElemIdx)
+			idx++
+			if idx < uintptr(code.Length) {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				p := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, p+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpMapPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			uptr := ptrToUnsafePtr(p)
+			mlen := maplen(uptr)
+			if mlen <= 0 {
+				b = appendEmptyObject(ctx, b)
+				code = code.End.Next
+				break
+			}
+			b = appendStructHead(ctx, b)
+			unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
+			mapCtx := encoder.NewMapContext(mlen, unorderedMap)
+			mapiterinit(code.Type, uptr, &mapCtx.Iter)
+			store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
+			ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
+			if unorderedMap {
+				b = appendMapKeyIndent(ctx, code.Next, b)
+			} else {
+				mapCtx.Start = len(b)
+				mapCtx.First = len(b)
+			}
+			key := mapiterkey(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(key))
+			code = code.Next
+		case encoder.OpMapKey:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			idx := mapCtx.Idx
+			idx++
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				if idx < mapCtx.Len {
+					b = appendMapKeyIndent(ctx, code, b)
+					mapCtx.Idx = int(idx)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					b = appendObjectEnd(ctx, code, b)
+					encoder.ReleaseMapContext(mapCtx)
+					code = code.End.Next
+				}
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Value = b[mapCtx.Start:len(b)]
+				if idx < mapCtx.Len {
+					mapCtx.Idx = int(idx)
+					mapCtx.Start = len(b)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					code = code.End
+				}
+			}
+		case encoder.OpMapValue:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				b = appendColon(ctx, b)
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Key = b[mapCtx.Start:len(b)]
+				mapCtx.Start = len(b)
+			}
+			value := mapitervalue(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(value))
+			mapiternext(&mapCtx.Iter)
+			code = code.Next
+		case encoder.OpMapEnd:
+			// this operation only used by sorted map.
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			sort.Sort(mapCtx.Slice)
+			buf := mapCtx.Buf
+			for _, item := range mapCtx.Slice.Items {
+				buf = appendMapKeyValue(ctx, code, buf, item.Key, item.Value)
+			}
+			buf = appendMapEnd(ctx, code, buf)
+			b = b[:mapCtx.First]
+			b = append(b, buf...)
+			mapCtx.Buf = buf
+			encoder.ReleaseMapContext(mapCtx)
+			code = code.Next
+		case encoder.OpRecursivePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpRecursive:
+			ptr := load(ctxptr, code.Idx)
+			if ptr != 0 {
+				if recursiveLevel > encoder.StartDetectingCyclesAfter {
+					for _, seen := range ctx.SeenPtr {
+						if ptr == seen {
+							return nil, errUnsupportedValue(code, ptr)
+						}
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, ptr)
+			c := code.Jmp.Code
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += code.Jmp.CurLen * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			indentDiffFromTop := c.Indent - 1
+			ctx.BaseIndent += code.Indent - indentDiffFromTop
+
+			newLen := offsetNum + code.Jmp.CurLen + code.Jmp.NextLen
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			store(ctxptr, c.Idx, ptr)
+			store(ctxptr, c.End.Next.Idx, oldOffset)
+			store(ctxptr, c.End.Next.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, c.End.Next, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpRecursiveEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			restoreIndent(ctx, code, ctxptr)
+			offset := load(ctxptr, code.Idx)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpStructPtrHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if len(code.Key) > 0 {
+				if (code.Flags&encoder.IsTaggedKeyFlags) != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+					b = appendStructKey(ctx, code, b)
+				}
+			}
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if p == 0 || (ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			u64 := ptrToUint64(p, code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNull(ctx, b)
+					b = appendComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p+uintptr(code.Offset)))))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadArray, encoder.OpStructPtrHeadSlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadArray, encoder.OpStructHeadSlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyArray:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			b = appendStructKey(ctx, code, b)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptySlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadArrayPtr, encoder.OpStructPtrHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadArrayPtr, encoder.OpStructHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyArrayPtr, encoder.OpStructPtrHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArrayPtr, encoder.OpStructHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			if maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, iface)
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructField:
+			if code.Flags&encoder.IsTaggedKeyFlags != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+				b = appendStructKey(ctx, code, b)
+			}
+			p := load(ctxptr, code.Idx) + uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringString:
+			p := load(ctxptr, code.Idx)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalJSON(ctx, code, b, iface)
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalText:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldArrayPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArrayPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldSlice:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldSlicePtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldMap:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 || maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldMapPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldStruct:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyStruct:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructEnd:
+			b = appendStructEndSkipLast(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendStructEnd(ctx, code, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytesPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendStructEnd(ctx, code, bb)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpEnd:
+			goto END
+		}
+	}
+END:
+	return b, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/debug_vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/debug_vm.go
new file mode 100644
index 00000000..99395388
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/debug_vm.go
@@ -0,0 +1,35 @@
+package vm_indent
+
+import (
+	"fmt"
+
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+func DebugRun(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	var code *encoder.Opcode
+	if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+		code = codeSet.EscapeKeyCode
+	} else {
+		code = codeSet.NoescapeKeyCode
+	}
+
+	defer func() {
+		if err := recover(); err != nil {
+			w := ctx.Option.DebugOut
+			fmt.Fprintln(w, "=============[DEBUG]===============")
+			fmt.Fprintln(w, "* [TYPE]")
+			fmt.Fprintln(w, codeSet.Type)
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [ALL OPCODE]")
+			fmt.Fprintln(w, code.Dump())
+			fmt.Fprintf(w, "\n")
+			fmt.Fprintln(w, "* [CONTEXT]")
+			fmt.Fprintf(w, "%+v\n", ctx)
+			fmt.Fprintln(w, "===================================")
+			panic(err)
+		}
+	}()
+
+	return Run(ctx, b, codeSet)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/hack.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/hack.go
new file mode 100644
index 00000000..9e245bfe
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/hack.go
@@ -0,0 +1,9 @@
+package vm_indent
+
+import (
+	// HACK: compile order
+	// `vm`, `vm_indent`, `vm_color`, `vm_color_indent` packages uses a lot of memory to compile,
+	// so forcibly make dependencies and avoid compiling in concurrent.
+	// dependency order: vm => vm_indent => vm_color => vm_color_indent
+	_ "github.com/goccy/go-json/internal/encoder/vm_color"
+)
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/util.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/util.go
new file mode 100644
index 00000000..6cb745e3
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/util.go
@@ -0,0 +1,230 @@
+package vm_indent
+
+import (
+	"encoding/json"
+	"fmt"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+const uintptrSize = 4 << (^uintptr(0) >> 63)
+
+var (
+	appendInt           = encoder.AppendInt
+	appendUint          = encoder.AppendUint
+	appendFloat32       = encoder.AppendFloat32
+	appendFloat64       = encoder.AppendFloat64
+	appendString        = encoder.AppendString
+	appendByteSlice     = encoder.AppendByteSlice
+	appendNumber        = encoder.AppendNumber
+	appendStructEnd     = encoder.AppendStructEndIndent
+	appendIndent        = encoder.AppendIndent
+	errUnsupportedValue = encoder.ErrUnsupportedValue
+	errUnsupportedFloat = encoder.ErrUnsupportedFloat
+	mapiterinit         = encoder.MapIterInit
+	mapiterkey          = encoder.MapIterKey
+	mapitervalue        = encoder.MapIterValue
+	mapiternext         = encoder.MapIterNext
+	maplen              = encoder.MapLen
+)
+
+type emptyInterface struct {
+	typ *runtime.Type
+	ptr unsafe.Pointer
+}
+
+type nonEmptyInterface struct {
+	itab *struct {
+		ityp *runtime.Type // static interface type
+		typ  *runtime.Type // dynamic concrete type
+		// unused fields...
+	}
+	ptr unsafe.Pointer
+}
+
+func errUnimplementedOp(op encoder.OpType) error {
+	return fmt.Errorf("encoder (indent): opcode %s has not been implemented", op)
+}
+
+func load(base uintptr, idx uint32) uintptr {
+	addr := base + uintptr(idx)
+	return **(**uintptr)(unsafe.Pointer(&addr))
+}
+
+func store(base uintptr, idx uint32, p uintptr) {
+	addr := base + uintptr(idx)
+	**(**uintptr)(unsafe.Pointer(&addr)) = p
+}
+
+func loadNPtr(base uintptr, idx uint32, ptrNum uint8) uintptr {
+	addr := base + uintptr(idx)
+	p := **(**uintptr)(unsafe.Pointer(&addr))
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUint64(p uintptr, bitSize uint8) uint64 {
+	switch bitSize {
+	case 8:
+		return (uint64)(**(**uint8)(unsafe.Pointer(&p)))
+	case 16:
+		return (uint64)(**(**uint16)(unsafe.Pointer(&p)))
+	case 32:
+		return (uint64)(**(**uint32)(unsafe.Pointer(&p)))
+	case 64:
+		return **(**uint64)(unsafe.Pointer(&p))
+	}
+	return 0
+}
+func ptrToFloat32(p uintptr) float32            { return **(**float32)(unsafe.Pointer(&p)) }
+func ptrToFloat64(p uintptr) float64            { return **(**float64)(unsafe.Pointer(&p)) }
+func ptrToBool(p uintptr) bool                  { return **(**bool)(unsafe.Pointer(&p)) }
+func ptrToBytes(p uintptr) []byte               { return **(**[]byte)(unsafe.Pointer(&p)) }
+func ptrToNumber(p uintptr) json.Number         { return **(**json.Number)(unsafe.Pointer(&p)) }
+func ptrToString(p uintptr) string              { return **(**string)(unsafe.Pointer(&p)) }
+func ptrToSlice(p uintptr) *runtime.SliceHeader { return *(**runtime.SliceHeader)(unsafe.Pointer(&p)) }
+func ptrToPtr(p uintptr) uintptr {
+	return uintptr(**(**unsafe.Pointer)(unsafe.Pointer(&p)))
+}
+func ptrToNPtr(p uintptr, ptrNum uint8) uintptr {
+	for i := uint8(0); i < ptrNum; i++ {
+		if p == 0 {
+			return 0
+		}
+		p = ptrToPtr(p)
+	}
+	return p
+}
+
+func ptrToUnsafePtr(p uintptr) unsafe.Pointer {
+	return *(*unsafe.Pointer)(unsafe.Pointer(&p))
+}
+func ptrToInterface(code *encoder.Opcode, p uintptr) interface{} {
+	return *(*interface{})(unsafe.Pointer(&emptyInterface{
+		typ: code.Type,
+		ptr: *(*unsafe.Pointer)(unsafe.Pointer(&p)),
+	}))
+}
+
+func appendBool(_ *encoder.RuntimeContext, b []byte, v bool) []byte {
+	if v {
+		return append(b, "true"...)
+	}
+	return append(b, "false"...)
+}
+
+func appendNull(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, "null"...)
+}
+
+func appendComma(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, ',', '\n')
+}
+
+func appendNullComma(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, "null,\n"...)
+}
+
+func appendColon(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b[:len(b)-2], ':', ' ')
+}
+
+func appendMapKeyValue(ctx *encoder.RuntimeContext, code *encoder.Opcode, b, key, value []byte) []byte {
+	b = appendIndent(ctx, b, code.Indent+1)
+	b = append(b, key...)
+	b[len(b)-2] = ':'
+	b[len(b)-1] = ' '
+	return append(b, value...)
+}
+
+func appendMapEnd(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = b[:len(b)-2]
+	b = append(b, '\n')
+	b = appendIndent(ctx, b, code.Indent)
+	return append(b, '}', ',', '\n')
+}
+
+func appendArrayHead(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = append(b, '[', '\n')
+	return appendIndent(ctx, b, code.Indent+1)
+}
+
+func appendArrayEnd(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = b[:len(b)-2]
+	b = append(b, '\n')
+	b = appendIndent(ctx, b, code.Indent)
+	return append(b, ']', ',', '\n')
+}
+
+func appendEmptyArray(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '[', ']', ',', '\n')
+}
+
+func appendEmptyObject(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{', '}', ',', '\n')
+}
+
+func appendObjectEnd(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	// replace comma to newline
+	b[last-1] = '\n'
+	b = appendIndent(ctx, b[:last], code.Indent)
+	return append(b, '}', ',', '\n')
+}
+
+func appendMarshalJSON(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	return encoder.AppendMarshalJSONIndent(ctx, code, b, v)
+}
+
+func appendMarshalText(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte, v interface{}) ([]byte, error) {
+	return encoder.AppendMarshalTextIndent(ctx, code, b, v)
+}
+
+func appendStructHead(_ *encoder.RuntimeContext, b []byte) []byte {
+	return append(b, '{', '\n')
+}
+
+func appendStructKey(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	b = appendIndent(ctx, b, code.Indent)
+	b = append(b, code.Key...)
+	return append(b, ' ')
+}
+
+func appendStructEndSkipLast(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	last := len(b) - 1
+	if b[last-1] == '{' {
+		b[last] = '}'
+	} else {
+		if b[last] == '\n' {
+			// to remove ',' and '\n' characters
+			b = b[:len(b)-2]
+		}
+		b = append(b, '\n')
+		b = appendIndent(ctx, b, code.Indent-1)
+		b = append(b, '}')
+	}
+	return appendComma(ctx, b)
+}
+
+func restoreIndent(ctx *encoder.RuntimeContext, code *encoder.Opcode, ctxptr uintptr) {
+	ctx.BaseIndent = uint32(load(ctxptr, code.Length))
+}
+
+func storeIndent(ctxptr uintptr, code *encoder.Opcode, indent uintptr) {
+	store(ctxptr, code.Length, indent)
+}
+
+func appendArrayElemIndent(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	return appendIndent(ctx, b, code.Indent+1)
+}
+
+func appendMapKeyIndent(ctx *encoder.RuntimeContext, code *encoder.Opcode, b []byte) []byte {
+	return appendIndent(ctx, b, code.Indent)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/vm.go b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/vm.go
new file mode 100644
index 00000000..836c5c8a
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/encoder/vm_indent/vm.go
@@ -0,0 +1,4859 @@
+// Code generated by internal/cmd/generator. DO NOT EDIT!
+package vm_indent
+
+import (
+	"math"
+	"reflect"
+	"sort"
+	"unsafe"
+
+	"github.com/goccy/go-json/internal/encoder"
+	"github.com/goccy/go-json/internal/runtime"
+)
+
+func Run(ctx *encoder.RuntimeContext, b []byte, codeSet *encoder.OpcodeSet) ([]byte, error) {
+	recursiveLevel := 0
+	ptrOffset := uintptr(0)
+	ctxptr := ctx.Ptr()
+	var code *encoder.Opcode
+	if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+		code = codeSet.EscapeKeyCode
+	} else {
+		code = codeSet.NoescapeKeyCode
+	}
+
+	for {
+		switch code.Op {
+		default:
+			return nil, errUnimplementedOp(code.Op)
+		case encoder.OpPtr:
+			p := load(ctxptr, code.Idx)
+			code = code.Next
+			store(ctxptr, code.Idx, ptrToPtr(p))
+		case encoder.OpIntPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInt:
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpUint:
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpIntString:
+			b = append(b, '"')
+			b = appendInt(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpUintString:
+			b = append(b, '"')
+			b = appendUint(ctx, b, load(ctxptr, code.Idx), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat32Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat32:
+			b = appendFloat32(ctx, b, ptrToFloat32(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpFloat64Ptr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpFloat64:
+			v := ptrToFloat64(load(ctxptr, code.Idx))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStringPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpString:
+			b = appendString(ctx, b, ptrToString(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBoolPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBool:
+			b = appendBool(ctx, b, ptrToBool(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpBytesPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpBytes:
+			b = appendByteSlice(ctx, b, ptrToBytes(load(ctxptr, code.Idx)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpNumberPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpNumber:
+			bb, err := appendNumber(ctx, b, ptrToNumber(load(ctxptr, code.Idx)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpInterfacePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpInterface:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if recursiveLevel > encoder.StartDetectingCyclesAfter {
+				for _, seen := range ctx.SeenPtr {
+					if p == seen {
+						return nil, errUnsupportedValue(code, p)
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, p)
+			var (
+				typ      *runtime.Type
+				ifacePtr unsafe.Pointer
+			)
+			up := ptrToUnsafePtr(p)
+			if code.Flags&encoder.NonEmptyInterfaceFlags != 0 {
+				iface := (*nonEmptyInterface)(up)
+				ifacePtr = iface.ptr
+				if iface.itab != nil {
+					typ = iface.itab.typ
+				}
+			} else {
+				iface := (*emptyInterface)(up)
+				ifacePtr = iface.ptr
+				typ = iface.typ
+			}
+			if ifacePtr == nil {
+				isDirectedNil := typ != nil && typ.Kind() == reflect.Struct && !runtime.IfaceIndir(typ)
+				if !isDirectedNil {
+					b = appendNullComma(ctx, b)
+					code = code.Next
+					break
+				}
+			}
+			ctx.KeepRefs = append(ctx.KeepRefs, up)
+			ifaceCodeSet, err := encoder.CompileToGetCodeSet(ctx, uintptr(unsafe.Pointer(typ)))
+			if err != nil {
+				return nil, err
+			}
+
+			totalLength := uintptr(code.Length) + 3
+			nextTotalLength := uintptr(ifaceCodeSet.CodeLength) + 3
+
+			var c *encoder.Opcode
+			if (ctx.Option.Flag & encoder.HTMLEscapeOption) != 0 {
+				c = ifaceCodeSet.InterfaceEscapeKeyCode
+			} else {
+				c = ifaceCodeSet.InterfaceNoescapeKeyCode
+			}
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += totalLength * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			ctx.BaseIndent += code.Indent
+
+			newLen := offsetNum + totalLength + nextTotalLength
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			end := ifaceCodeSet.EndCode
+			store(ctxptr, c.Idx, uintptr(ifacePtr))
+			store(ctxptr, end.Idx, oldOffset)
+			store(ctxptr, end.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, end, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpInterfaceEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			offset := load(ctxptr, code.Idx)
+			restoreIndent(ctx, code, ctxptr)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToPtr(p))
+			fallthrough
+		case encoder.OpMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = append(b, `""`...)
+				b = appendComma(ctx, b)
+				code = code.Next
+				break
+			}
+			if (code.Flags&encoder.IsNilableTypeFlags) != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpSlicePtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpSlice:
+			p := load(ctxptr, code.Idx)
+			slice := ptrToSlice(p)
+			if p == 0 || slice.Data == nil {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.ElemIdx, 0)
+			store(ctxptr, code.Length, uintptr(slice.Len))
+			store(ctxptr, code.Idx, uintptr(slice.Data))
+			if slice.Len > 0 {
+				b = appendArrayHead(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, uintptr(slice.Data))
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpSliceElem:
+			idx := load(ctxptr, code.ElemIdx)
+			length := load(ctxptr, code.Length)
+			idx++
+			if idx < length {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				data := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, data+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			if code.Length > 0 {
+				b = appendArrayHead(ctx, code, b)
+				store(ctxptr, code.ElemIdx, 0)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				b = appendEmptyArray(ctx, b)
+				code = code.End.Next
+			}
+		case encoder.OpArrayElem:
+			idx := load(ctxptr, code.ElemIdx)
+			idx++
+			if idx < uintptr(code.Length) {
+				b = appendArrayElemIndent(ctx, code, b)
+				store(ctxptr, code.ElemIdx, idx)
+				p := load(ctxptr, code.Idx)
+				size := uintptr(code.Size)
+				code = code.Next
+				store(ctxptr, code.Idx, p+idx*size)
+			} else {
+				b = appendArrayEnd(ctx, code, b)
+				code = code.End.Next
+			}
+		case encoder.OpMapPtr:
+			p := loadNPtr(ctxptr, code.Idx, code.PtrNum)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, p)
+			fallthrough
+		case encoder.OpMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.End.Next
+				break
+			}
+			uptr := ptrToUnsafePtr(p)
+			mlen := maplen(uptr)
+			if mlen <= 0 {
+				b = appendEmptyObject(ctx, b)
+				code = code.End.Next
+				break
+			}
+			b = appendStructHead(ctx, b)
+			unorderedMap := (ctx.Option.Flag & encoder.UnorderedMapOption) != 0
+			mapCtx := encoder.NewMapContext(mlen, unorderedMap)
+			mapiterinit(code.Type, uptr, &mapCtx.Iter)
+			store(ctxptr, code.Idx, uintptr(unsafe.Pointer(mapCtx)))
+			ctx.KeepRefs = append(ctx.KeepRefs, unsafe.Pointer(mapCtx))
+			if unorderedMap {
+				b = appendMapKeyIndent(ctx, code.Next, b)
+			} else {
+				mapCtx.Start = len(b)
+				mapCtx.First = len(b)
+			}
+			key := mapiterkey(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(key))
+			code = code.Next
+		case encoder.OpMapKey:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			idx := mapCtx.Idx
+			idx++
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				if idx < mapCtx.Len {
+					b = appendMapKeyIndent(ctx, code, b)
+					mapCtx.Idx = int(idx)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					b = appendObjectEnd(ctx, code, b)
+					encoder.ReleaseMapContext(mapCtx)
+					code = code.End.Next
+				}
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Value = b[mapCtx.Start:len(b)]
+				if idx < mapCtx.Len {
+					mapCtx.Idx = int(idx)
+					mapCtx.Start = len(b)
+					key := mapiterkey(&mapCtx.Iter)
+					store(ctxptr, code.Next.Idx, uintptr(key))
+					code = code.Next
+				} else {
+					code = code.End
+				}
+			}
+		case encoder.OpMapValue:
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			if (ctx.Option.Flag & encoder.UnorderedMapOption) != 0 {
+				b = appendColon(ctx, b)
+			} else {
+				mapCtx.Slice.Items[mapCtx.Idx].Key = b[mapCtx.Start:len(b)]
+				mapCtx.Start = len(b)
+			}
+			value := mapitervalue(&mapCtx.Iter)
+			store(ctxptr, code.Next.Idx, uintptr(value))
+			mapiternext(&mapCtx.Iter)
+			code = code.Next
+		case encoder.OpMapEnd:
+			// this operation only used by sorted map.
+			mapCtx := (*encoder.MapContext)(ptrToUnsafePtr(load(ctxptr, code.Idx)))
+			sort.Sort(mapCtx.Slice)
+			buf := mapCtx.Buf
+			for _, item := range mapCtx.Slice.Items {
+				buf = appendMapKeyValue(ctx, code, buf, item.Key, item.Value)
+			}
+			buf = appendMapEnd(ctx, code, buf)
+			b = b[:mapCtx.First]
+			b = append(b, buf...)
+			mapCtx.Buf = buf
+			encoder.ReleaseMapContext(mapCtx)
+			code = code.Next
+		case encoder.OpRecursivePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				code = code.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpRecursive:
+			ptr := load(ctxptr, code.Idx)
+			if ptr != 0 {
+				if recursiveLevel > encoder.StartDetectingCyclesAfter {
+					for _, seen := range ctx.SeenPtr {
+						if ptr == seen {
+							return nil, errUnsupportedValue(code, ptr)
+						}
+					}
+				}
+			}
+			ctx.SeenPtr = append(ctx.SeenPtr, ptr)
+			c := code.Jmp.Code
+			curlen := uintptr(len(ctx.Ptrs))
+			offsetNum := ptrOffset / uintptrSize
+			oldOffset := ptrOffset
+			ptrOffset += code.Jmp.CurLen * uintptrSize
+			oldBaseIndent := ctx.BaseIndent
+			indentDiffFromTop := c.Indent - 1
+			ctx.BaseIndent += code.Indent - indentDiffFromTop
+
+			newLen := offsetNum + code.Jmp.CurLen + code.Jmp.NextLen
+			if curlen < newLen {
+				ctx.Ptrs = append(ctx.Ptrs, make([]uintptr, newLen-curlen)...)
+			}
+			ctxptr = ctx.Ptr() + ptrOffset // assign new ctxptr
+
+			store(ctxptr, c.Idx, ptr)
+			store(ctxptr, c.End.Next.Idx, oldOffset)
+			store(ctxptr, c.End.Next.ElemIdx, uintptr(unsafe.Pointer(code.Next)))
+			storeIndent(ctxptr, c.End.Next, uintptr(oldBaseIndent))
+			code = c
+			recursiveLevel++
+		case encoder.OpRecursiveEnd:
+			recursiveLevel--
+
+			// restore ctxptr
+			restoreIndent(ctx, code, ctxptr)
+			offset := load(ctxptr, code.Idx)
+			ctx.SeenPtr = ctx.SeenPtr[:len(ctx.SeenPtr)-1]
+
+			codePtr := load(ctxptr, code.ElemIdx)
+			code = (*encoder.Opcode)(ptrToUnsafePtr(codePtr))
+			ctxptr = ctx.Ptr() + offset
+			ptrOffset = offset
+		case encoder.OpStructPtrHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHead:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if len(code.Key) > 0 {
+				if (code.Flags&encoder.IsTaggedKeyFlags) != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+					b = appendStructKey(ctx, code, b)
+				}
+			}
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && ((code.Flags&encoder.IndirectFlags) != 0 || code.Next.Op == encoder.OpStructEnd) {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if p == 0 || (ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyInt:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			u64 := ptrToUint64(p, code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUint:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64String:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v == 0 {
+				code = code.NextField
+			} else {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNull(ctx, b)
+					b = appendComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p+uintptr(code.Offset)))))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToString(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBool:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructPtrHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytes:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumber:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberString:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v == "" {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructPtrHeadArray, encoder.OpStructPtrHeadSlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadArray, encoder.OpStructHeadSlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyArray:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArray:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			b = appendStructKey(ctx, code, b)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptySlice:
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p := load(ctxptr, code.Idx)
+				if p == 0 {
+					if code.Flags&encoder.AnonymousHeadFlags == 0 {
+						b = appendNullComma(ctx, b)
+					}
+					code = code.End.Next
+					break
+				}
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadArrayPtr, encoder.OpStructPtrHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadArrayPtr, encoder.OpStructHeadSlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyArrayPtr, encoder.OpStructPtrHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyArrayPtr, encoder.OpStructHeadOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructPtrHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p != 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				p = ptrToPtr(p + uintptr(code.Offset))
+			}
+			if maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+				break
+			}
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 {
+				code = code.NextField
+			} else {
+				if (code.Flags & encoder.IndirectFlags) != 0 {
+					p = ptrToNPtr(p, code.PtrNum)
+				}
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructPtrHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalJSON {
+					p = ptrToPtr(p)
+				}
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, iface)
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			}
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				if (code.Flags&encoder.IndirectFlags) != 0 || code.Op == encoder.OpStructPtrHeadOmitEmptyMarshalText {
+					p = ptrToPtr(p)
+				}
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructPtrHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			b = appendStructKey(ctx, code, b)
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructPtrHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			store(ctxptr, code.Idx, ptrToNPtr(p, code.PtrNum))
+			fallthrough
+		case encoder.OpStructHeadOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			if p == 0 && (code.Flags&encoder.IndirectFlags) != 0 {
+				if code.Flags&encoder.AnonymousHeadFlags == 0 {
+					b = appendNullComma(ctx, b)
+				}
+				code = code.End.Next
+				break
+			}
+			if (code.Flags & encoder.IndirectFlags) != 0 {
+				p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			}
+			if code.Flags&encoder.AnonymousHeadFlags == 0 {
+				b = appendStructHead(ctx, b)
+			}
+			if p == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+				b = appendComma(ctx, b)
+				code = code.Next
+			}
+		case encoder.OpStructField:
+			if code.Flags&encoder.IsTaggedKeyFlags != 0 || code.Flags&encoder.AnonymousKeyFlags == 0 {
+				b = appendStructKey(ctx, code, b)
+			}
+			p := load(ctxptr, code.Idx) + uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmpty:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNullComma(ctx, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringString:
+			p := load(ctxptr, code.Idx)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			b = appendStructKey(ctx, code, b)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendComma(ctx, b)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSON:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			iface := ptrToInterface(code, p)
+			if (code.Flags&encoder.NilCheckFlags) != 0 && encoder.IsNilForMarshaler(iface) {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalJSON(ctx, code, b, iface)
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalJSONPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalJSON(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldMarshalText:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalText:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if (code.Flags & encoder.IsNilableTypeFlags) != 0 {
+				p = ptrToPtr(p)
+			}
+			if p == 0 && (code.Flags&encoder.NilCheckFlags) != 0 {
+				code = code.NextField
+				break
+			}
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+			if err != nil {
+				return nil, err
+			}
+			b = appendComma(ctx, bb)
+			code = code.Next
+		case encoder.OpStructFieldMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendComma(ctx, b)
+			code = code.Next
+		case encoder.OpStructFieldOmitEmptyMarshalTextPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendMarshalText(ctx, code, b, ptrToInterface(code, p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendComma(ctx, bb)
+			}
+			code = code.Next
+		case encoder.OpStructFieldArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArray:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldArrayPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyArrayPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldSlice:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlice:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			slice := ptrToSlice(p)
+			if slice.Len == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldSlicePtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptySlicePtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldMap:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMap:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p == 0 || maplen(ptrToUnsafePtr(p)) == 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructFieldMapPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyMapPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToPtr(p + uintptr(code.Offset))
+			if p != 0 {
+				p = ptrToNPtr(p, code.PtrNum)
+			}
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			} else {
+				code = code.NextField
+			}
+		case encoder.OpStructFieldStruct:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			code = code.Next
+			store(ctxptr, code.Idx, p)
+		case encoder.OpStructFieldOmitEmptyStruct:
+			p := load(ctxptr, code.Idx)
+			p += uintptr(code.Offset)
+			if ptrToPtr(p) == 0 && (code.Flags&encoder.IsNextOpPtrTypeFlags) != 0 {
+				code = code.NextField
+			} else {
+				b = appendStructKey(ctx, code, b)
+				code = code.Next
+				store(ctxptr, code.Idx, p)
+			}
+		case encoder.OpStructEnd:
+			b = appendStructEndSkipLast(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndInt:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyInt:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendInt(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendInt(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndIntPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyIntPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendInt(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUint:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUint:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintString:
+			p := load(ctxptr, code.Idx)
+			u64 := ptrToUint64(p+uintptr(code.Offset), code.NumBitSize)
+			v := u64 & ((1 << code.NumBitSize) - 1)
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p+uintptr(code.Offset), code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendUint(ctx, b, p, code)
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendUint(ctx, b, p, code)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndUintPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyUintPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendUint(ctx, b, p, code)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32String:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat32(ctx, b, ptrToFloat32(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat32(p + uintptr(code.Offset))
+			if v != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat32PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat32PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat32(ctx, b, ptrToFloat32(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendFloat64(ctx, b, v)
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64String:
+			p := load(ctxptr, code.Idx)
+			v := ptrToFloat64(p + uintptr(code.Offset))
+			if v != 0 {
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64Ptr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+				b = appendStructEnd(ctx, code, b)
+				code = code.Next
+				break
+			}
+			v := ptrToFloat64(p)
+			if math.IsInf(v, 0) || math.IsNaN(v) {
+				return nil, errUnsupportedFloat(v)
+			}
+			b = appendFloat64(ctx, b, v)
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64Ptr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndFloat64PtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyFloat64PtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				v := ptrToFloat64(p)
+				if math.IsInf(v, 0) || math.IsNaN(v) {
+					return nil, errUnsupportedFloat(v)
+				}
+				b = append(b, '"')
+				b = appendFloat64(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendString(ctx, b, ptrToString(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			s := ptrToString(p + uintptr(code.Offset))
+			b = appendString(ctx, b, string(appendString(ctx, []byte{}, s)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToString(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, v)))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, ptrToString(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, ptrToString(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndStringPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyStringPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendString(ctx, b, string(appendString(ctx, []byte{}, ptrToString(p))))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBool:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBool:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			b = appendBool(ctx, b, ptrToBool(p+uintptr(code.Offset)))
+			b = append(b, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBool(p + uintptr(code.Offset))
+			if v {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, v)
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendBool(ctx, b, ptrToBool(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBoolPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBoolPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				b = appendBool(ctx, b, ptrToBool(p))
+				b = append(b, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytes:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = appendByteSlice(ctx, b, ptrToBytes(p+uintptr(code.Offset)))
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytes:
+			p := load(ctxptr, code.Idx)
+			v := ptrToBytes(p + uintptr(code.Offset))
+			if len(v) > 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, v)
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndBytesPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyBytesPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = appendByteSlice(ctx, b, ptrToBytes(p))
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumber:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = appendStructEnd(ctx, code, bb)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumber:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberString:
+			p := load(ctxptr, code.Idx)
+			b = appendStructKey(ctx, code, b)
+			b = append(b, '"')
+			bb, err := appendNumber(ctx, b, ptrToNumber(p+uintptr(code.Offset)))
+			if err != nil {
+				return nil, err
+			}
+			b = append(bb, '"')
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberString:
+			p := load(ctxptr, code.Idx)
+			v := ptrToNumber(p + uintptr(code.Offset))
+			if v != "" {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, v)
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtr:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = bb
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtr:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = appendStructEnd(ctx, code, bb)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpStructEndNumberPtrString:
+			b = appendStructKey(ctx, code, b)
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p == 0 {
+				b = appendNull(ctx, b)
+			} else {
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+			}
+			b = appendStructEnd(ctx, code, b)
+			code = code.Next
+		case encoder.OpStructEndOmitEmptyNumberPtrString:
+			p := load(ctxptr, code.Idx)
+			p = ptrToNPtr(p+uintptr(code.Offset), code.PtrNum)
+			if p != 0 {
+				b = appendStructKey(ctx, code, b)
+				b = append(b, '"')
+				bb, err := appendNumber(ctx, b, ptrToNumber(p))
+				if err != nil {
+					return nil, err
+				}
+				b = append(bb, '"')
+				b = appendStructEnd(ctx, code, b)
+			} else {
+				b = appendStructEndSkipLast(ctx, code, b)
+			}
+			code = code.Next
+		case encoder.OpEnd:
+			goto END
+		}
+	}
+END:
+	return b, nil
+}
diff --git a/vendor/github.com/goccy/go-json/internal/errors/error.go b/vendor/github.com/goccy/go-json/internal/errors/error.go
new file mode 100644
index 00000000..9207d0ff
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/errors/error.go
@@ -0,0 +1,183 @@
+package errors
+
+import (
+	"fmt"
+	"reflect"
+	"strconv"
+)
+
+type InvalidUTF8Error struct {
+	S string // the whole string value that caused the error
+}
+
+func (e *InvalidUTF8Error) Error() string {
+	return fmt.Sprintf("json: invalid UTF-8 in string: %s", strconv.Quote(e.S))
+}
+
+type InvalidUnmarshalError struct {
+	Type reflect.Type
+}
+
+func (e *InvalidUnmarshalError) Error() string {
+	if e.Type == nil {
+		return "json: Unmarshal(nil)"
+	}
+
+	if e.Type.Kind() != reflect.Ptr {
+		return fmt.Sprintf("json: Unmarshal(non-pointer %s)", e.Type)
+	}
+	return fmt.Sprintf("json: Unmarshal(nil %s)", e.Type)
+}
+
+// A MarshalerError represents an error from calling a MarshalJSON or MarshalText method.
+type MarshalerError struct {
+	Type       reflect.Type
+	Err        error
+	sourceFunc string
+}
+
+func (e *MarshalerError) Error() string {
+	srcFunc := e.sourceFunc
+	if srcFunc == "" {
+		srcFunc = "MarshalJSON"
+	}
+	return fmt.Sprintf("json: error calling %s for type %s: %s", srcFunc, e.Type, e.Err.Error())
+}
+
+// Unwrap returns the underlying error.
+func (e *MarshalerError) Unwrap() error { return e.Err }
+
+// A SyntaxError is a description of a JSON syntax error.
+type SyntaxError struct {
+	msg    string // description of error
+	Offset int64  // error occurred after reading Offset bytes
+}
+
+func (e *SyntaxError) Error() string { return e.msg }
+
+// An UnmarshalFieldError describes a JSON object key that
+// led to an unexported (and therefore unwritable) struct field.
+//
+// Deprecated: No longer used; kept for compatibility.
+type UnmarshalFieldError struct {
+	Key   string
+	Type  reflect.Type
+	Field reflect.StructField
+}
+
+func (e *UnmarshalFieldError) Error() string {
+	return fmt.Sprintf("json: cannot unmarshal object key %s into unexported field %s of type %s",
+		strconv.Quote(e.Key), e.Field.Name, e.Type.String(),
+	)
+}
+
+// An UnmarshalTypeError describes a JSON value that was
+// not appropriate for a value of a specific Go type.
+type UnmarshalTypeError struct {
+	Value  string       // description of JSON value - "bool", "array", "number -5"
+	Type   reflect.Type // type of Go value it could not be assigned to
+	Offset int64        // error occurred after reading Offset bytes
+	Struct string       // name of the struct type containing the field
+	Field  string       // the full path from root node to the field
+}
+
+func (e *UnmarshalTypeError) Error() string {
+	if e.Struct != "" || e.Field != "" {
+		return fmt.Sprintf("json: cannot unmarshal %s into Go struct field %s.%s of type %s",
+			e.Value, e.Struct, e.Field, e.Type,
+		)
+	}
+	return fmt.Sprintf("json: cannot unmarshal %s into Go value of type %s", e.Value, e.Type)
+}
+
+// An UnsupportedTypeError is returned by Marshal when attempting
+// to encode an unsupported value type.
+type UnsupportedTypeError struct {
+	Type reflect.Type
+}
+
+func (e *UnsupportedTypeError) Error() string {
+	return fmt.Sprintf("json: unsupported type: %s", e.Type)
+}
+
+type UnsupportedValueError struct {
+	Value reflect.Value
+	Str   string
+}
+
+func (e *UnsupportedValueError) Error() string {
+	return fmt.Sprintf("json: unsupported value: %s", e.Str)
+}
+
+func ErrSyntax(msg string, offset int64) *SyntaxError {
+	return &SyntaxError{msg: msg, Offset: offset}
+}
+
+func ErrMarshaler(typ reflect.Type, err error, msg string) *MarshalerError {
+	return &MarshalerError{
+		Type:       typ,
+		Err:        err,
+		sourceFunc: msg,
+	}
+}
+
+func ErrExceededMaxDepth(c byte, cursor int64) *SyntaxError {
+	return &SyntaxError{
+		msg:    fmt.Sprintf(`invalid character "%c" exceeded max depth`, c),
+		Offset: cursor,
+	}
+}
+
+func ErrNotAtBeginningOfValue(cursor int64) *SyntaxError {
+	return &SyntaxError{msg: "not at beginning of value", Offset: cursor}
+}
+
+func ErrUnexpectedEndOfJSON(msg string, cursor int64) *SyntaxError {
+	return &SyntaxError{
+		msg:    fmt.Sprintf("json: %s unexpected end of JSON input", msg),
+		Offset: cursor,
+	}
+}
+
+func ErrExpected(msg string, cursor int64) *SyntaxError {
+	return &SyntaxError{msg: fmt.Sprintf("expected %s", msg), Offset: cursor}
+}
+
+func ErrInvalidCharacter(c byte, context string, cursor int64) *SyntaxError {
+	if c == 0 {
+		return &SyntaxError{
+			msg:    fmt.Sprintf("json: invalid character as %s", context),
+			Offset: cursor,
+		}
+	}
+	return &SyntaxError{
+		msg:    fmt.Sprintf("json: invalid character %c as %s", c, context),
+		Offset: cursor,
+	}
+}
+
+func ErrInvalidBeginningOfValue(c byte, cursor int64) *SyntaxError {
+	return &SyntaxError{
+		msg:    fmt.Sprintf("invalid character '%c' looking for beginning of value", c),
+		Offset: cursor,
+	}
+}
+
+type PathError struct {
+	msg string
+}
+
+func (e *PathError) Error() string {
+	return fmt.Sprintf("json: invalid path format: %s", e.msg)
+}
+
+func ErrInvalidPath(msg string, args ...interface{}) *PathError {
+	if len(args) != 0 {
+		return &PathError{msg: fmt.Sprintf(msg, args...)}
+	}
+	return &PathError{msg: msg}
+}
+
+func ErrEmptyPath() *PathError {
+	return &PathError{msg: "path is empty"}
+}
diff --git a/vendor/github.com/goccy/go-json/internal/runtime/rtype.go b/vendor/github.com/goccy/go-json/internal/runtime/rtype.go
new file mode 100644
index 00000000..37cfe35a
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/runtime/rtype.go
@@ -0,0 +1,262 @@
+package runtime
+
+import (
+	"reflect"
+	"unsafe"
+)
+
+// Type representing reflect.rtype for noescape trick
+type Type struct{}
+
+//go:linkname rtype_Align reflect.(*rtype).Align
+//go:noescape
+func rtype_Align(*Type) int
+
+func (t *Type) Align() int {
+	return rtype_Align(t)
+}
+
+//go:linkname rtype_FieldAlign reflect.(*rtype).FieldAlign
+//go:noescape
+func rtype_FieldAlign(*Type) int
+
+func (t *Type) FieldAlign() int {
+	return rtype_FieldAlign(t)
+}
+
+//go:linkname rtype_Method reflect.(*rtype).Method
+//go:noescape
+func rtype_Method(*Type, int) reflect.Method
+
+func (t *Type) Method(a0 int) reflect.Method {
+	return rtype_Method(t, a0)
+}
+
+//go:linkname rtype_MethodByName reflect.(*rtype).MethodByName
+//go:noescape
+func rtype_MethodByName(*Type, string) (reflect.Method, bool)
+
+func (t *Type) MethodByName(a0 string) (reflect.Method, bool) {
+	return rtype_MethodByName(t, a0)
+}
+
+//go:linkname rtype_NumMethod reflect.(*rtype).NumMethod
+//go:noescape
+func rtype_NumMethod(*Type) int
+
+func (t *Type) NumMethod() int {
+	return rtype_NumMethod(t)
+}
+
+//go:linkname rtype_Name reflect.(*rtype).Name
+//go:noescape
+func rtype_Name(*Type) string
+
+func (t *Type) Name() string {
+	return rtype_Name(t)
+}
+
+//go:linkname rtype_PkgPath reflect.(*rtype).PkgPath
+//go:noescape
+func rtype_PkgPath(*Type) string
+
+func (t *Type) PkgPath() string {
+	return rtype_PkgPath(t)
+}
+
+//go:linkname rtype_Size reflect.(*rtype).Size
+//go:noescape
+func rtype_Size(*Type) uintptr
+
+func (t *Type) Size() uintptr {
+	return rtype_Size(t)
+}
+
+//go:linkname rtype_String reflect.(*rtype).String
+//go:noescape
+func rtype_String(*Type) string
+
+func (t *Type) String() string {
+	return rtype_String(t)
+}
+
+//go:linkname rtype_Kind reflect.(*rtype).Kind
+//go:noescape
+func rtype_Kind(*Type) reflect.Kind
+
+func (t *Type) Kind() reflect.Kind {
+	return rtype_Kind(t)
+}
+
+//go:linkname rtype_Implements reflect.(*rtype).Implements
+//go:noescape
+func rtype_Implements(*Type, reflect.Type) bool
+
+func (t *Type) Implements(u reflect.Type) bool {
+	return rtype_Implements(t, u)
+}
+
+//go:linkname rtype_AssignableTo reflect.(*rtype).AssignableTo
+//go:noescape
+func rtype_AssignableTo(*Type, reflect.Type) bool
+
+func (t *Type) AssignableTo(u reflect.Type) bool {
+	return rtype_AssignableTo(t, u)
+}
+
+//go:linkname rtype_ConvertibleTo reflect.(*rtype).ConvertibleTo
+//go:noescape
+func rtype_ConvertibleTo(*Type, reflect.Type) bool
+
+func (t *Type) ConvertibleTo(u reflect.Type) bool {
+	return rtype_ConvertibleTo(t, u)
+}
+
+//go:linkname rtype_Comparable reflect.(*rtype).Comparable
+//go:noescape
+func rtype_Comparable(*Type) bool
+
+func (t *Type) Comparable() bool {
+	return rtype_Comparable(t)
+}
+
+//go:linkname rtype_Bits reflect.(*rtype).Bits
+//go:noescape
+func rtype_Bits(*Type) int
+
+func (t *Type) Bits() int {
+	return rtype_Bits(t)
+}
+
+//go:linkname rtype_ChanDir reflect.(*rtype).ChanDir
+//go:noescape
+func rtype_ChanDir(*Type) reflect.ChanDir
+
+func (t *Type) ChanDir() reflect.ChanDir {
+	return rtype_ChanDir(t)
+}
+
+//go:linkname rtype_IsVariadic reflect.(*rtype).IsVariadic
+//go:noescape
+func rtype_IsVariadic(*Type) bool
+
+func (t *Type) IsVariadic() bool {
+	return rtype_IsVariadic(t)
+}
+
+//go:linkname rtype_Elem reflect.(*rtype).Elem
+//go:noescape
+func rtype_Elem(*Type) reflect.Type
+
+func (t *Type) Elem() *Type {
+	return Type2RType(rtype_Elem(t))
+}
+
+//go:linkname rtype_Field reflect.(*rtype).Field
+//go:noescape
+func rtype_Field(*Type, int) reflect.StructField
+
+func (t *Type) Field(i int) reflect.StructField {
+	return rtype_Field(t, i)
+}
+
+//go:linkname rtype_FieldByIndex reflect.(*rtype).FieldByIndex
+//go:noescape
+func rtype_FieldByIndex(*Type, []int) reflect.StructField
+
+func (t *Type) FieldByIndex(index []int) reflect.StructField {
+	return rtype_FieldByIndex(t, index)
+}
+
+//go:linkname rtype_FieldByName reflect.(*rtype).FieldByName
+//go:noescape
+func rtype_FieldByName(*Type, string) (reflect.StructField, bool)
+
+func (t *Type) FieldByName(name string) (reflect.StructField, bool) {
+	return rtype_FieldByName(t, name)
+}
+
+//go:linkname rtype_FieldByNameFunc reflect.(*rtype).FieldByNameFunc
+//go:noescape
+func rtype_FieldByNameFunc(*Type, func(string) bool) (reflect.StructField, bool)
+
+func (t *Type) FieldByNameFunc(match func(string) bool) (reflect.StructField, bool) {
+	return rtype_FieldByNameFunc(t, match)
+}
+
+//go:linkname rtype_In reflect.(*rtype).In
+//go:noescape
+func rtype_In(*Type, int) reflect.Type
+
+func (t *Type) In(i int) reflect.Type {
+	return rtype_In(t, i)
+}
+
+//go:linkname rtype_Key reflect.(*rtype).Key
+//go:noescape
+func rtype_Key(*Type) reflect.Type
+
+func (t *Type) Key() *Type {
+	return Type2RType(rtype_Key(t))
+}
+
+//go:linkname rtype_Len reflect.(*rtype).Len
+//go:noescape
+func rtype_Len(*Type) int
+
+func (t *Type) Len() int {
+	return rtype_Len(t)
+}
+
+//go:linkname rtype_NumField reflect.(*rtype).NumField
+//go:noescape
+func rtype_NumField(*Type) int
+
+func (t *Type) NumField() int {
+	return rtype_NumField(t)
+}
+
+//go:linkname rtype_NumIn reflect.(*rtype).NumIn
+//go:noescape
+func rtype_NumIn(*Type) int
+
+func (t *Type) NumIn() int {
+	return rtype_NumIn(t)
+}
+
+//go:linkname rtype_NumOut reflect.(*rtype).NumOut
+//go:noescape
+func rtype_NumOut(*Type) int
+
+func (t *Type) NumOut() int {
+	return rtype_NumOut(t)
+}
+
+//go:linkname rtype_Out reflect.(*rtype).Out
+//go:noescape
+func rtype_Out(*Type, int) reflect.Type
+
+//go:linkname PtrTo reflect.(*rtype).ptrTo
+//go:noescape
+func PtrTo(*Type) *Type
+
+func (t *Type) Out(i int) reflect.Type {
+	return rtype_Out(t, i)
+}
+
+//go:linkname IfaceIndir reflect.ifaceIndir
+//go:noescape
+func IfaceIndir(*Type) bool
+
+//go:linkname RType2Type reflect.toType
+//go:noescape
+func RType2Type(t *Type) reflect.Type
+
+type emptyInterface struct {
+	_   *Type
+	ptr unsafe.Pointer
+}
+
+func Type2RType(t reflect.Type) *Type {
+	return (*Type)(((*emptyInterface)(unsafe.Pointer(&t))).ptr)
+}
diff --git a/vendor/github.com/goccy/go-json/internal/runtime/struct_field.go b/vendor/github.com/goccy/go-json/internal/runtime/struct_field.go
new file mode 100644
index 00000000..baab0c59
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/runtime/struct_field.go
@@ -0,0 +1,91 @@
+package runtime
+
+import (
+	"reflect"
+	"strings"
+	"unicode"
+)
+
+func getTag(field reflect.StructField) string {
+	return field.Tag.Get("json")
+}
+
+func IsIgnoredStructField(field reflect.StructField) bool {
+	if field.PkgPath != "" {
+		if field.Anonymous {
+			t := field.Type
+			if t.Kind() == reflect.Ptr {
+				t = t.Elem()
+			}
+			if t.Kind() != reflect.Struct {
+				return true
+			}
+		} else {
+			// private field
+			return true
+		}
+	}
+	tag := getTag(field)
+	return tag == "-"
+}
+
+type StructTag struct {
+	Key         string
+	IsTaggedKey bool
+	IsOmitEmpty bool
+	IsString    bool
+	Field       reflect.StructField
+}
+
+type StructTags []*StructTag
+
+func (t StructTags) ExistsKey(key string) bool {
+	for _, tt := range t {
+		if tt.Key == key {
+			return true
+		}
+	}
+	return false
+}
+
+func isValidTag(s string) bool {
+	if s == "" {
+		return false
+	}
+	for _, c := range s {
+		switch {
+		case strings.ContainsRune("!#$%&()*+-./:<=>?@[]^_{|}~ ", c):
+			// Backslash and quote chars are reserved, but
+			// otherwise any punctuation chars are allowed
+			// in a tag name.
+		case !unicode.IsLetter(c) && !unicode.IsDigit(c):
+			return false
+		}
+	}
+	return true
+}
+
+func StructTagFromField(field reflect.StructField) *StructTag {
+	keyName := field.Name
+	tag := getTag(field)
+	st := &StructTag{Field: field}
+	opts := strings.Split(tag, ",")
+	if len(opts) > 0 {
+		if opts[0] != "" && isValidTag(opts[0]) {
+			keyName = opts[0]
+			st.IsTaggedKey = true
+		}
+	}
+	st.Key = keyName
+	if len(opts) > 1 {
+		for _, opt := range opts[1:] {
+			switch opt {
+			case "omitempty":
+				st.IsOmitEmpty = true
+			case "string":
+				st.IsString = true
+			}
+		}
+	}
+	return st
+}
diff --git a/vendor/github.com/goccy/go-json/internal/runtime/type.go b/vendor/github.com/goccy/go-json/internal/runtime/type.go
new file mode 100644
index 00000000..0167cd2c
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/internal/runtime/type.go
@@ -0,0 +1,100 @@
+package runtime
+
+import (
+	"reflect"
+	"unsafe"
+)
+
+type SliceHeader struct {
+	Data unsafe.Pointer
+	Len  int
+	Cap  int
+}
+
+const (
+	maxAcceptableTypeAddrRange = 1024 * 1024 * 2 // 2 Mib
+)
+
+type TypeAddr struct {
+	BaseTypeAddr uintptr
+	MaxTypeAddr  uintptr
+	AddrRange    uintptr
+	AddrShift    uintptr
+}
+
+var (
+	typeAddr        *TypeAddr
+	alreadyAnalyzed bool
+)
+
+//go:linkname typelinks reflect.typelinks
+func typelinks() ([]unsafe.Pointer, [][]int32)
+
+//go:linkname rtypeOff reflect.rtypeOff
+func rtypeOff(unsafe.Pointer, int32) unsafe.Pointer
+
+func AnalyzeTypeAddr() *TypeAddr {
+	defer func() {
+		alreadyAnalyzed = true
+	}()
+	if alreadyAnalyzed {
+		return typeAddr
+	}
+	sections, offsets := typelinks()
+	if len(sections) != 1 {
+		return nil
+	}
+	if len(offsets) != 1 {
+		return nil
+	}
+	section := sections[0]
+	offset := offsets[0]
+	var (
+		min         uintptr = uintptr(^uint(0))
+		max         uintptr = 0
+		isAligned64         = true
+		isAligned32         = true
+	)
+	for i := 0; i < len(offset); i++ {
+		typ := (*Type)(rtypeOff(section, offset[i]))
+		addr := uintptr(unsafe.Pointer(typ))
+		if min > addr {
+			min = addr
+		}
+		if max < addr {
+			max = addr
+		}
+		if typ.Kind() == reflect.Ptr {
+			addr = uintptr(unsafe.Pointer(typ.Elem()))
+			if min > addr {
+				min = addr
+			}
+			if max < addr {
+				max = addr
+			}
+		}
+		isAligned64 = isAligned64 && (addr-min)&63 == 0
+		isAligned32 = isAligned32 && (addr-min)&31 == 0
+	}
+	addrRange := max - min
+	if addrRange == 0 {
+		return nil
+	}
+	var addrShift uintptr
+	if isAligned64 {
+		addrShift = 6
+	} else if isAligned32 {
+		addrShift = 5
+	}
+	cacheSize := addrRange >> addrShift
+	if cacheSize > maxAcceptableTypeAddrRange {
+		return nil
+	}
+	typeAddr = &TypeAddr{
+		BaseTypeAddr: min,
+		MaxTypeAddr:  max,
+		AddrRange:    addrRange,
+		AddrShift:    addrShift,
+	}
+	return typeAddr
+}
diff --git a/vendor/github.com/goccy/go-json/json.go b/vendor/github.com/goccy/go-json/json.go
new file mode 100644
index 00000000..fb18065a
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/json.go
@@ -0,0 +1,368 @@
+package json
+
+import (
+	"bytes"
+	"context"
+	"encoding/json"
+
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+// Marshaler is the interface implemented by types that
+// can marshal themselves into valid JSON.
+type Marshaler interface {
+	MarshalJSON() ([]byte, error)
+}
+
+// MarshalerContext is the interface implemented by types that
+// can marshal themselves into valid JSON with context.Context.
+type MarshalerContext interface {
+	MarshalJSON(context.Context) ([]byte, error)
+}
+
+// Unmarshaler is the interface implemented by types
+// that can unmarshal a JSON description of themselves.
+// The input can be assumed to be a valid encoding of
+// a JSON value. UnmarshalJSON must copy the JSON data
+// if it wishes to retain the data after returning.
+//
+// By convention, to approximate the behavior of Unmarshal itself,
+// Unmarshalers implement UnmarshalJSON([]byte("null")) as a no-op.
+type Unmarshaler interface {
+	UnmarshalJSON([]byte) error
+}
+
+// UnmarshalerContext is the interface implemented by types
+// that can unmarshal with context.Context a JSON description of themselves.
+type UnmarshalerContext interface {
+	UnmarshalJSON(context.Context, []byte) error
+}
+
+// Marshal returns the JSON encoding of v.
+//
+// Marshal traverses the value v recursively.
+// If an encountered value implements the Marshaler interface
+// and is not a nil pointer, Marshal calls its MarshalJSON method
+// to produce JSON. If no MarshalJSON method is present but the
+// value implements encoding.TextMarshaler instead, Marshal calls
+// its MarshalText method and encodes the result as a JSON string.
+// The nil pointer exception is not strictly necessary
+// but mimics a similar, necessary exception in the behavior of
+// UnmarshalJSON.
+//
+// Otherwise, Marshal uses the following type-dependent default encodings:
+//
+// Boolean values encode as JSON booleans.
+//
+// Floating point, integer, and Number values encode as JSON numbers.
+//
+// String values encode as JSON strings coerced to valid UTF-8,
+// replacing invalid bytes with the Unicode replacement rune.
+// The angle brackets "<" and ">" are escaped to "\u003c" and "\u003e"
+// to keep some browsers from misinterpreting JSON output as HTML.
+// Ampersand "&" is also escaped to "\u0026" for the same reason.
+// This escaping can be disabled using an Encoder that had SetEscapeHTML(false)
+// called on it.
+//
+// Array and slice values encode as JSON arrays, except that
+// []byte encodes as a base64-encoded string, and a nil slice
+// encodes as the null JSON value.
+//
+// Struct values encode as JSON objects.
+// Each exported struct field becomes a member of the object, using the
+// field name as the object key, unless the field is omitted for one of the
+// reasons given below.
+//
+// The encoding of each struct field can be customized by the format string
+// stored under the "json" key in the struct field's tag.
+// The format string gives the name of the field, possibly followed by a
+// comma-separated list of options. The name may be empty in order to
+// specify options without overriding the default field name.
+//
+// The "omitempty" option specifies that the field should be omitted
+// from the encoding if the field has an empty value, defined as
+// false, 0, a nil pointer, a nil interface value, and any empty array,
+// slice, map, or string.
+//
+// As a special case, if the field tag is "-", the field is always omitted.
+// Note that a field with name "-" can still be generated using the tag "-,".
+//
+// Examples of struct field tags and their meanings:
+//
+//	// Field appears in JSON as key "myName".
+//	Field int `json:"myName"`
+//
+//	// Field appears in JSON as key "myName" and
+//	// the field is omitted from the object if its value is empty,
+//	// as defined above.
+//	Field int `json:"myName,omitempty"`
+//
+//	// Field appears in JSON as key "Field" (the default), but
+//	// the field is skipped if empty.
+//	// Note the leading comma.
+//	Field int `json:",omitempty"`
+//
+//	// Field is ignored by this package.
+//	Field int `json:"-"`
+//
+//	// Field appears in JSON as key "-".
+//	Field int `json:"-,"`
+//
+// The "string" option signals that a field is stored as JSON inside a
+// JSON-encoded string. It applies only to fields of string, floating point,
+// integer, or boolean types. This extra level of encoding is sometimes used
+// when communicating with JavaScript programs:
+//
+//	Int64String int64 `json:",string"`
+//
+// The key name will be used if it's a non-empty string consisting of
+// only Unicode letters, digits, and ASCII punctuation except quotation
+// marks, backslash, and comma.
+//
+// Anonymous struct fields are usually marshaled as if their inner exported fields
+// were fields in the outer struct, subject to the usual Go visibility rules amended
+// as described in the next paragraph.
+// An anonymous struct field with a name given in its JSON tag is treated as
+// having that name, rather than being anonymous.
+// An anonymous struct field of interface type is treated the same as having
+// that type as its name, rather than being anonymous.
+//
+// The Go visibility rules for struct fields are amended for JSON when
+// deciding which field to marshal or unmarshal. If there are
+// multiple fields at the same level, and that level is the least
+// nested (and would therefore be the nesting level selected by the
+// usual Go rules), the following extra rules apply:
+//
+// 1) Of those fields, if any are JSON-tagged, only tagged fields are considered,
+// even if there are multiple untagged fields that would otherwise conflict.
+//
+// 2) If there is exactly one field (tagged or not according to the first rule), that is selected.
+//
+// 3) Otherwise there are multiple fields, and all are ignored; no error occurs.
+//
+// Handling of anonymous struct fields is new in Go 1.1.
+// Prior to Go 1.1, anonymous struct fields were ignored. To force ignoring of
+// an anonymous struct field in both current and earlier versions, give the field
+// a JSON tag of "-".
+//
+// Map values encode as JSON objects. The map's key type must either be a
+// string, an integer type, or implement encoding.TextMarshaler. The map keys
+// are sorted and used as JSON object keys by applying the following rules,
+// subject to the UTF-8 coercion described for string values above:
+//   - string keys are used directly
+//   - encoding.TextMarshalers are marshaled
+//   - integer keys are converted to strings
+//
+// Pointer values encode as the value pointed to.
+// A nil pointer encodes as the null JSON value.
+//
+// Interface values encode as the value contained in the interface.
+// A nil interface value encodes as the null JSON value.
+//
+// Channel, complex, and function values cannot be encoded in JSON.
+// Attempting to encode such a value causes Marshal to return
+// an UnsupportedTypeError.
+//
+// JSON cannot represent cyclic data structures and Marshal does not
+// handle them. Passing cyclic structures to Marshal will result in
+// an infinite recursion.
+func Marshal(v interface{}) ([]byte, error) {
+	return MarshalWithOption(v)
+}
+
+// MarshalNoEscape returns the JSON encoding of v and doesn't escape v.
+func MarshalNoEscape(v interface{}) ([]byte, error) {
+	return marshalNoEscape(v)
+}
+
+// MarshalContext returns the JSON encoding of v with context.Context and EncodeOption.
+func MarshalContext(ctx context.Context, v interface{}, optFuncs ...EncodeOptionFunc) ([]byte, error) {
+	return marshalContext(ctx, v, optFuncs...)
+}
+
+// MarshalWithOption returns the JSON encoding of v with EncodeOption.
+func MarshalWithOption(v interface{}, optFuncs ...EncodeOptionFunc) ([]byte, error) {
+	return marshal(v, optFuncs...)
+}
+
+// MarshalIndent is like Marshal but applies Indent to format the output.
+// Each JSON element in the output will begin on a new line beginning with prefix
+// followed by one or more copies of indent according to the indentation nesting.
+func MarshalIndent(v interface{}, prefix, indent string) ([]byte, error) {
+	return MarshalIndentWithOption(v, prefix, indent)
+}
+
+// MarshalIndentWithOption is like Marshal but applies Indent to format the output with EncodeOption.
+func MarshalIndentWithOption(v interface{}, prefix, indent string, optFuncs ...EncodeOptionFunc) ([]byte, error) {
+	return marshalIndent(v, prefix, indent, optFuncs...)
+}
+
+// Unmarshal parses the JSON-encoded data and stores the result
+// in the value pointed to by v. If v is nil or not a pointer,
+// Unmarshal returns an InvalidUnmarshalError.
+//
+// Unmarshal uses the inverse of the encodings that
+// Marshal uses, allocating maps, slices, and pointers as necessary,
+// with the following additional rules:
+//
+// To unmarshal JSON into a pointer, Unmarshal first handles the case of
+// the JSON being the JSON literal null. In that case, Unmarshal sets
+// the pointer to nil. Otherwise, Unmarshal unmarshals the JSON into
+// the value pointed at by the pointer. If the pointer is nil, Unmarshal
+// allocates a new value for it to point to.
+//
+// To unmarshal JSON into a value implementing the Unmarshaler interface,
+// Unmarshal calls that value's UnmarshalJSON method, including
+// when the input is a JSON null.
+// Otherwise, if the value implements encoding.TextUnmarshaler
+// and the input is a JSON quoted string, Unmarshal calls that value's
+// UnmarshalText method with the unquoted form of the string.
+//
+// To unmarshal JSON into a struct, Unmarshal matches incoming object
+// keys to the keys used by Marshal (either the struct field name or its tag),
+// preferring an exact match but also accepting a case-insensitive match. By
+// default, object keys which don't have a corresponding struct field are
+// ignored (see Decoder.DisallowUnknownFields for an alternative).
+//
+// To unmarshal JSON into an interface value,
+// Unmarshal stores one of these in the interface value:
+//
+//	bool, for JSON booleans
+//	float64, for JSON numbers
+//	string, for JSON strings
+//	[]interface{}, for JSON arrays
+//	map[string]interface{}, for JSON objects
+//	nil for JSON null
+//
+// To unmarshal a JSON array into a slice, Unmarshal resets the slice length
+// to zero and then appends each element to the slice.
+// As a special case, to unmarshal an empty JSON array into a slice,
+// Unmarshal replaces the slice with a new empty slice.
+//
+// To unmarshal a JSON array into a Go array, Unmarshal decodes
+// JSON array elements into corresponding Go array elements.
+// If the Go array is smaller than the JSON array,
+// the additional JSON array elements are discarded.
+// If the JSON array is smaller than the Go array,
+// the additional Go array elements are set to zero values.
+//
+// To unmarshal a JSON object into a map, Unmarshal first establishes a map to
+// use. If the map is nil, Unmarshal allocates a new map. Otherwise Unmarshal
+// reuses the existing map, keeping existing entries. Unmarshal then stores
+// key-value pairs from the JSON object into the map. The map's key type must
+// either be any string type, an integer, implement json.Unmarshaler, or
+// implement encoding.TextUnmarshaler.
+//
+// If a JSON value is not appropriate for a given target type,
+// or if a JSON number overflows the target type, Unmarshal
+// skips that field and completes the unmarshaling as best it can.
+// If no more serious errors are encountered, Unmarshal returns
+// an UnmarshalTypeError describing the earliest such error. In any
+// case, it's not guaranteed that all the remaining fields following
+// the problematic one will be unmarshaled into the target object.
+//
+// The JSON null value unmarshals into an interface, map, pointer, or slice
+// by setting that Go value to nil. Because null is often used in JSON to mean
+// “not present,” unmarshaling a JSON null into any other Go type has no effect
+// on the value and produces no error.
+//
+// When unmarshaling quoted strings, invalid UTF-8 or
+// invalid UTF-16 surrogate pairs are not treated as an error.
+// Instead, they are replaced by the Unicode replacement
+// character U+FFFD.
+func Unmarshal(data []byte, v interface{}) error {
+	return unmarshal(data, v)
+}
+
+// UnmarshalContext parses the JSON-encoded data and stores the result
+// in the value pointed to by v. If you implement the UnmarshalerContext interface,
+// call it with ctx as an argument.
+func UnmarshalContext(ctx context.Context, data []byte, v interface{}, optFuncs ...DecodeOptionFunc) error {
+	return unmarshalContext(ctx, data, v)
+}
+
+func UnmarshalWithOption(data []byte, v interface{}, optFuncs ...DecodeOptionFunc) error {
+	return unmarshal(data, v, optFuncs...)
+}
+
+func UnmarshalNoEscape(data []byte, v interface{}, optFuncs ...DecodeOptionFunc) error {
+	return unmarshalNoEscape(data, v, optFuncs...)
+}
+
+// A Token holds a value of one of these types:
+//
+//	Delim, for the four JSON delimiters [ ] { }
+//	bool, for JSON booleans
+//	float64, for JSON numbers
+//	Number, for JSON numbers
+//	string, for JSON string literals
+//	nil, for JSON null
+type Token = json.Token
+
+// A Number represents a JSON number literal.
+type Number = json.Number
+
+// RawMessage is a raw encoded JSON value.
+// It implements Marshaler and Unmarshaler and can
+// be used to delay JSON decoding or precompute a JSON encoding.
+type RawMessage = json.RawMessage
+
+// A Delim is a JSON array or object delimiter, one of [ ] { or }.
+type Delim = json.Delim
+
+// Compact appends to dst the JSON-encoded src with
+// insignificant space characters elided.
+func Compact(dst *bytes.Buffer, src []byte) error {
+	return encoder.Compact(dst, src, false)
+}
+
+// Indent appends to dst an indented form of the JSON-encoded src.
+// Each element in a JSON object or array begins on a new,
+// indented line beginning with prefix followed by one or more
+// copies of indent according to the indentation nesting.
+// The data appended to dst does not begin with the prefix nor
+// any indentation, to make it easier to embed inside other formatted JSON data.
+// Although leading space characters (space, tab, carriage return, newline)
+// at the beginning of src are dropped, trailing space characters
+// at the end of src are preserved and copied to dst.
+// For example, if src has no trailing spaces, neither will dst;
+// if src ends in a trailing newline, so will dst.
+func Indent(dst *bytes.Buffer, src []byte, prefix, indent string) error {
+	return encoder.Indent(dst, src, prefix, indent)
+}
+
+// HTMLEscape appends to dst the JSON-encoded src with <, >, &, U+2028 and U+2029
+// characters inside string literals changed to \u003c, \u003e, \u0026, \u2028, \u2029
+// so that the JSON will be safe to embed inside HTML <script> tags.
+// For historical reasons, web browsers don't honor standard HTML
+// escaping within <script> tags, so an alternative JSON encoding must
+// be used.
+func HTMLEscape(dst *bytes.Buffer, src []byte) {
+	var v interface{}
+	dec := NewDecoder(bytes.NewBuffer(src))
+	dec.UseNumber()
+	if err := dec.Decode(&v); err != nil {
+		return
+	}
+	buf, _ := marshal(v)
+	dst.Write(buf)
+}
+
+// Valid reports whether data is a valid JSON encoding.
+func Valid(data []byte) bool {
+	var v interface{}
+	decoder := NewDecoder(bytes.NewReader(data))
+	err := decoder.Decode(&v)
+	if err != nil {
+		return false
+	}
+	if !decoder.More() {
+		return true
+	}
+	return decoder.InputOffset() >= int64(len(data))
+}
+
+func init() {
+	encoder.Marshal = Marshal
+	encoder.Unmarshal = Unmarshal
+}
diff --git a/vendor/github.com/goccy/go-json/option.go b/vendor/github.com/goccy/go-json/option.go
new file mode 100644
index 00000000..378031a0
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/option.go
@@ -0,0 +1,79 @@
+package json
+
+import (
+	"io"
+
+	"github.com/goccy/go-json/internal/decoder"
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+type EncodeOption = encoder.Option
+type EncodeOptionFunc func(*EncodeOption)
+
+// UnorderedMap doesn't sort when encoding map type.
+func UnorderedMap() EncodeOptionFunc {
+	return func(opt *EncodeOption) {
+		opt.Flag |= encoder.UnorderedMapOption
+	}
+}
+
+// DisableHTMLEscape disables escaping of HTML characters ( '&', '<', '>' ) when encoding string.
+func DisableHTMLEscape() EncodeOptionFunc {
+	return func(opt *EncodeOption) {
+		opt.Flag &= ^encoder.HTMLEscapeOption
+	}
+}
+
+// DisableNormalizeUTF8
+// By default, when encoding string, UTF8 characters in the range of 0x80 - 0xFF are processed by applying \ufffd for invalid code and escaping for \u2028 and \u2029.
+// This option disables this behaviour. You can expect faster speeds by applying this option, but be careful.
+// encoding/json implements here: https://github.com/golang/go/blob/6178d25fc0b28724b1b5aec2b1b74fc06d9294c7/src/encoding/json/encode.go#L1067-L1093.
+func DisableNormalizeUTF8() EncodeOptionFunc {
+	return func(opt *EncodeOption) {
+		opt.Flag &= ^encoder.NormalizeUTF8Option
+	}
+}
+
+// Debug outputs debug information when panic occurs during encoding.
+func Debug() EncodeOptionFunc {
+	return func(opt *EncodeOption) {
+		opt.Flag |= encoder.DebugOption
+	}
+}
+
+// DebugWith sets the destination to write debug messages.
+func DebugWith(w io.Writer) EncodeOptionFunc {
+	return func(opt *EncodeOption) {
+		opt.DebugOut = w
+	}
+}
+
+// DebugDOT sets the destination to write opcodes graph.
+func DebugDOT(w io.WriteCloser) EncodeOptionFunc {
+	return func(opt *EncodeOption) {
+		opt.DebugDOTOut = w
+	}
+}
+
+// Colorize add an identifier for coloring to the string of the encoded result.
+func Colorize(scheme *ColorScheme) EncodeOptionFunc {
+	return func(opt *EncodeOption) {
+		opt.Flag |= encoder.ColorizeOption
+		opt.ColorScheme = scheme
+	}
+}
+
+type DecodeOption = decoder.Option
+type DecodeOptionFunc func(*DecodeOption)
+
+// DecodeFieldPriorityFirstWin
+// in the default behavior, go-json, like encoding/json,
+// will reflect the result of the last evaluation when a field with the same name exists.
+// This option allow you to change this behavior.
+// this option reflects the result of the first evaluation if a field with the same name exists.
+// This behavior has a performance advantage as it allows the subsequent strings to be skipped if all fields have been evaluated.
+func DecodeFieldPriorityFirstWin() DecodeOptionFunc {
+	return func(opt *DecodeOption) {
+		opt.Flags |= decoder.FirstWinOption
+	}
+}
diff --git a/vendor/github.com/goccy/go-json/path.go b/vendor/github.com/goccy/go-json/path.go
new file mode 100644
index 00000000..38abce78
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/path.go
@@ -0,0 +1,84 @@
+package json
+
+import (
+	"reflect"
+
+	"github.com/goccy/go-json/internal/decoder"
+)
+
+// CreatePath creates JSON Path.
+//
+// JSON Path rule
+// $   : root object or element. The JSON Path format must start with this operator, which refers to the outermost level of the JSON-formatted string.
+// .   : child operator. You can identify child values using dot-notation.
+// ..  : recursive descent.
+// []  : subscript operator. If the JSON object is an array, you can use brackets to specify the array index.
+// [*] : all objects/elements for array.
+//
+// Reserved words must be properly escaped when included in Path.
+//
+// Escape Rule
+// single quote style escape: e.g.) `$['a.b'].c`
+// double quote style escape: e.g.) `$."a.b".c`
+func CreatePath(p string) (*Path, error) {
+	path, err := decoder.PathString(p).Build()
+	if err != nil {
+		return nil, err
+	}
+	return &Path{path: path}, nil
+}
+
+// Path represents JSON Path.
+type Path struct {
+	path *decoder.Path
+}
+
+// RootSelectorOnly whether only the root selector ($) is used.
+func (p *Path) RootSelectorOnly() bool {
+	return p.path.RootSelectorOnly
+}
+
+// UsedSingleQuotePathSelector whether single quote-based escaping was done when building the JSON Path.
+func (p *Path) UsedSingleQuotePathSelector() bool {
+	return p.path.SingleQuotePathSelector
+}
+
+// UsedSingleQuotePathSelector whether double quote-based escaping was done when building the JSON Path.
+func (p *Path) UsedDoubleQuotePathSelector() bool {
+	return p.path.DoubleQuotePathSelector
+}
+
+// Extract extracts a specific JSON string.
+func (p *Path) Extract(data []byte, optFuncs ...DecodeOptionFunc) ([][]byte, error) {
+	return extractFromPath(p, data, optFuncs...)
+}
+
+// PathString returns original JSON Path string.
+func (p *Path) PathString() string {
+	return p.path.String()
+}
+
+// Unmarshal extract and decode the value of the part corresponding to JSON Path from the input data.
+func (p *Path) Unmarshal(data []byte, v interface{}, optFuncs ...DecodeOptionFunc) error {
+	contents, err := extractFromPath(p, data, optFuncs...)
+	if err != nil {
+		return err
+	}
+	results := make([]interface{}, 0, len(contents))
+	for _, content := range contents {
+		var result interface{}
+		if err := Unmarshal(content, &result); err != nil {
+			return err
+		}
+		results = append(results, result)
+	}
+	if err := decoder.AssignValue(reflect.ValueOf(results), reflect.ValueOf(v)); err != nil {
+		return err
+	}
+	return nil
+}
+
+// Get extract and substitute the value of the part corresponding to JSON Path from the input value.
+func (p *Path) Get(src, dst interface{}) error {
+	return p.path.Get(reflect.ValueOf(src), reflect.ValueOf(dst))
+}
diff --git a/vendor/github.com/goccy/go-json/query.go b/vendor/github.com/goccy/go-json/query.go
new file mode 100644
index 00000000..4b11cf20
--- /dev/null
+++ b/vendor/github.com/goccy/go-json/query.go
@@ -0,0 +1,47 @@
+package json
+
+import (
+	"github.com/goccy/go-json/internal/encoder"
+)
+
+type (
+	// FieldQuery you can dynamically filter the fields in the structure by creating a FieldQuery,
+	// adding it to context.Context using SetFieldQueryToContext and then passing it to MarshalContext.
+	// This is a type-safe operation, so it is faster than filtering using map[string]interface{}.
+	FieldQuery       = encoder.FieldQuery
+	FieldQueryString = encoder.FieldQueryString
+)
+
+var (
+	// FieldQueryFromContext get current FieldQuery from context.Context.
+	FieldQueryFromContext = encoder.FieldQueryFromContext
+	// SetFieldQueryToContext set current FieldQuery to context.Context.
+	SetFieldQueryToContext = encoder.SetFieldQueryToContext
+)
+
+// BuildFieldQuery builds FieldQuery by fieldName or sub field query.
+// First, specify the field name that you want to keep in structure type.
+// If the field you want to keep is a structure type, by creating a sub field query using BuildSubFieldQuery,
+// you can select the fields you want to keep in the structure.
+// This description can be written recursively.
+func BuildFieldQuery(fields ...FieldQueryString) (*FieldQuery, error) {
+	query, err := Marshal(fields)
+	if err != nil {
+		return nil, err
+	}
+	return FieldQueryString(query).Build()
+}
+
+// BuildSubFieldQuery builds sub field query.
+func BuildSubFieldQuery(name string) *SubFieldQuery {
+	return &SubFieldQuery{name: name}
+}
+
+type SubFieldQuery struct {
+	name string
+}
+
+func (q *SubFieldQuery) Fields(fields ...FieldQueryString) FieldQueryString {
+	query, _ := Marshal(map[string][]FieldQueryString{q.name: fields})
+	return FieldQueryString(query)
+}
diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go
index d7b15fcf..2e50082a 100644
--- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go
+++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/convert.go
@@ -94,7 +94,7 @@ func Int64(val string) (int64, error) {
 }
 
 // Int64Slice converts 'val' where individual integers are separated by
-// 'sep' into a int64 slice.
+// 'sep' into an int64 slice.
 func Int64Slice(val, sep string) ([]int64, error) {
 	s := strings.Split(val, sep)
 	values := make([]int64, len(s))
@@ -118,7 +118,7 @@ func Int32(val string) (int32, error) {
 }
 
 // Int32Slice converts 'val' where individual integers are separated by
-// 'sep' into a int32 slice.
+// 'sep' into an int32 slice.
 func Int32Slice(val, sep string) ([]int32, error) {
 	s := strings.Split(val, sep)
 	values := make([]int32, len(s))
@@ -190,7 +190,7 @@ func Bytes(val string) ([]byte, error) {
 }
 
 // BytesSlice converts 'val' where individual bytes sequences, encoded in URL-safe
-// base64 without padding, are separated by 'sep' into a slice of bytes slices slice.
+// base64 without padding, are separated by 'sep' into a slice of byte slices.
 func BytesSlice(val, sep string) ([][]byte, error) {
 	s := strings.Split(val, sep)
 	values := make([][]byte, len(s))
diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go
index 01f57341..41cd4f50 100644
--- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go
+++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/errors.go
@@ -81,6 +81,21 @@ func HTTPError(ctx context.Context, mux *ServeMux, marshaler Marshaler, w http.R
 	mux.errorHandler(ctx, mux, marshaler, w, r, err)
 }
 
+// HTTPStreamError uses the mux-configured stream error handler to notify error to the client without closing the connection.
+func HTTPStreamError(ctx context.Context, mux *ServeMux, marshaler Marshaler, w http.ResponseWriter, r *http.Request, err error) {
+	st := mux.streamErrorHandler(ctx, err)
+	msg := errorChunk(st)
+	buf, err := marshaler.Marshal(msg)
+	if err != nil {
+		grpclog.Errorf("Failed to marshal an error: %v", err)
+		return
+	}
+	if _, err := w.Write(buf); err != nil {
+		grpclog.Errorf("Failed to notify error to client: %v", err)
+		return
+	}
+}
+
 // DefaultHTTPErrorHandler is the default error handler.
 // If "err" is a gRPC Status, the function replies with the status code mapped by HTTPStatusFromCode.
 // If "err" is a HTTPStatusError, the function replies with the status code provide by that struct. This is
diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go
index 9005d6a0..2fcd7af3 100644
--- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go
+++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/fieldmask.go
@@ -155,7 +155,7 @@ func buildPathsBlindly(name string, in interface{}) []string {
 	return paths
 }
 
-// fieldMaskPathItem stores a in-progress deconstruction of a path for a fieldmask
+// fieldMaskPathItem stores an in-progress deconstruction of a path for a fieldmask
 type fieldMaskPathItem struct {
 	// the list of prior fields leading up to node connected by dots
 	path string
diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go
index 0b051e6e..07c28112 100644
--- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go
+++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/marshaler_registry.go
@@ -86,8 +86,8 @@ func (m marshalerRegistry) add(mime string, marshaler Marshaler) error {
 // It allows for a mapping of case-sensitive Content-Type MIME type string to runtime.Marshaler interfaces.
 //
 // For example, you could allow the client to specify the use of the runtime.JSONPb marshaler
-// with a "application/jsonpb" Content-Type and the use of the runtime.JSONBuiltin marshaler
-// with a "application/json" Content-Type.
+// with an "application/jsonpb" Content-Type and the use of the runtime.JSONBuiltin marshaler
+// with an "application/json" Content-Type.
 // "*" can be used to match any Content-Type.
 // This can be attached to a ServerMux with the marshaler option.
 func makeMarshalerMIMERegistry() marshalerRegistry {
diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go
index d549407f..f710036b 100644
--- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go
+++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/runtime/proto2_convert.go
@@ -40,7 +40,7 @@ func Float32P(val string) (*float32, error) {
 }
 
 // Int64P parses the given string representation of an integer
-// and returns a pointer to a int64 whose value is same as the parsed integer.
+// and returns a pointer to an int64 whose value is same as the parsed integer.
 func Int64P(val string) (*int64, error) {
 	i, err := Int64(val)
 	if err != nil {
@@ -50,7 +50,7 @@ func Int64P(val string) (*int64, error) {
 }
 
 // Int32P parses the given string representation of an integer
-// and returns a pointer to a int32 whose value is same as the parsed integer.
+// and returns a pointer to an int32 whose value is same as the parsed integer.
 func Int32P(val string) (*int32, error) {
 	i, err := Int32(val)
 	if err != nil {
diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go
index dfe7de48..38ca39cc 100644
--- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go
+++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/pattern.go
@@ -1,6 +1,6 @@
 package utilities
 
-// An OpCode is a opcode of compiled path patterns.
+// OpCode is an opcode of compiled path patterns.
 type OpCode int
 
 // These constants are the valid values of OpCode.
diff --git a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go
index d224ab77..66aa5f2d 100644
--- a/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go
+++ b/vendor/github.com/grpc-ecosystem/grpc-gateway/v2/utilities/string_array_flag.go
@@ -5,7 +5,7 @@ import (
 	"strings"
 )
 
-// flagInterface is an cut down interface to `flag`
+// flagInterface is a cut down interface to `flag`
 type flagInterface interface {
 	Var(value flag.Value, name string, usage string)
 }
diff --git a/vendor/github.com/klauspost/compress/.goreleaser.yml b/vendor/github.com/klauspost/compress/.goreleaser.yml
index a2295380..4528059c 100644
--- a/vendor/github.com/klauspost/compress/.goreleaser.yml
+++ b/vendor/github.com/klauspost/compress/.goreleaser.yml
@@ -1,5 +1,5 @@
-# This is an example goreleaser.yaml file with some sane defaults.
-# Make sure to check the documentation at http://goreleaser.com
+version: 2
+
 before:
   hooks:
     - ./gen.sh
@@ -99,7 +99,7 @@ archives:
 checksum:
   name_template: 'checksums.txt'
 snapshot:
-  name_template: "{{ .Tag }}-next"
+  version_template: "{{ .Tag }}-next"
 changelog:
   sort: asc
   filters:
diff --git a/vendor/github.com/klauspost/compress/README.md b/vendor/github.com/klauspost/compress/README.md
index 05c7359e..de264c85 100644
--- a/vendor/github.com/klauspost/compress/README.md
+++ b/vendor/github.com/klauspost/compress/README.md
@@ -16,6 +16,27 @@ This package provides various compression algorithms.
 
 # changelog
 
+* Sep 23rd, 2024 - [1.17.10](https://github.com/klauspost/compress/releases/tag/v1.17.10)
+	* gzhttp: Add TransportAlwaysDecompress option. https://github.com/klauspost/compress/pull/978
+	* gzhttp: Add supported decompress request body by @mirecl in https://github.com/klauspost/compress/pull/1002
+	* s2: Add EncodeBuffer buffer recycling callback https://github.com/klauspost/compress/pull/982
+	* zstd: Improve memory usage on small streaming encodes https://github.com/klauspost/compress/pull/1007
+	* flate: read data written with partial flush by @vajexal in https://github.com/klauspost/compress/pull/996
+
+* Jun 12th, 2024 - [1.17.9](https://github.com/klauspost/compress/releases/tag/v1.17.9)
+	* s2: Reduce ReadFrom temporary allocations https://github.com/klauspost/compress/pull/949
+	* flate, zstd: Shave some bytes off amd64 matchLen by @greatroar in https://github.com/klauspost/compress/pull/963
+	* Upgrade zip/zlib to 1.22.4 upstream https://github.com/klauspost/compress/pull/970 https://github.com/klauspost/compress/pull/971
+	* zstd: BuildDict fails with RLE table https://github.com/klauspost/compress/pull/951
+
+* Apr 9th, 2024 - [1.17.8](https://github.com/klauspost/compress/releases/tag/v1.17.8)
+	* zstd: Reject blocks where reserved values are not 0 https://github.com/klauspost/compress/pull/885
+	* zstd: Add RLE detection+encoding https://github.com/klauspost/compress/pull/938
+
+* Feb 21st, 2024 - [1.17.7](https://github.com/klauspost/compress/releases/tag/v1.17.7)
+	* s2: Add AsyncFlush method: Complete the block without flushing by @Jille in https://github.com/klauspost/compress/pull/927
+	* s2: Fix literal+repeat exceeds dst crash https://github.com/klauspost/compress/pull/930
+  
 * Feb 5th, 2024 - [1.17.6](https://github.com/klauspost/compress/releases/tag/v1.17.6)
 	* zstd: Fix incorrect repeat coding in best mode https://github.com/klauspost/compress/pull/923
 	* s2: Fix DecodeConcurrent deadlock on errors https://github.com/klauspost/compress/pull/925
@@ -81,7 +102,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp
 	* zstd: Various minor improvements by @greatroar in https://github.com/klauspost/compress/pull/788 https://github.com/klauspost/compress/pull/794 https://github.com/klauspost/compress/pull/795
 	* s2: Fix huge block overflow https://github.com/klauspost/compress/pull/779
 	* s2: Allow CustomEncoder fallback https://github.com/klauspost/compress/pull/780
-	* gzhttp: Suppport ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
+	* gzhttp: Support ResponseWriter Unwrap() in gzhttp handler by @jgimenez in https://github.com/klauspost/compress/pull/799
 
 * Mar 13, 2023 - [v1.16.1](https://github.com/klauspost/compress/releases/tag/v1.16.1)
 	* zstd: Speed up + improve best encoder by @greatroar in https://github.com/klauspost/compress/pull/776
@@ -136,7 +157,7 @@ https://github.com/klauspost/compress/pull/919 https://github.com/klauspost/comp
 	* zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649
 	* Add Go 1.19 - deprecate Go 1.16  https://github.com/klauspost/compress/pull/651
 	* flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656
-	* zstd: Improve "better" compresssion  https://github.com/klauspost/compress/pull/657
+	* zstd: Improve "better" compression  https://github.com/klauspost/compress/pull/657
 	* s2: Improve "best" compression https://github.com/klauspost/compress/pull/658
 	* s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635
 	* s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646
@@ -339,7 +360,7 @@ While the release has been extensively tested, it is recommended to testing when
 	* s2: Fix binaries.
 
 * Feb 25, 2021 (v1.11.8)
-	* s2: Fixed occational out-of-bounds write on amd64. Upgrade recommended.
+	* s2: Fixed occasional out-of-bounds write on amd64. Upgrade recommended.
 	* s2: Add AMD64 assembly for better mode. 25-50% faster. [#315](https://github.com/klauspost/compress/pull/315)
 	* s2: Less upfront decoder allocation. [#322](https://github.com/klauspost/compress/pull/322)
 	* zstd: Faster "compression" of incompressible data. [#314](https://github.com/klauspost/compress/pull/314)
@@ -518,7 +539,7 @@ While the release has been extensively tested, it is recommended to testing when
 * Feb 19, 2016: Faster bit writer, level -2 is 15% faster, level 1 is 4% faster.
 * Feb 19, 2016: Handle small payloads faster in level 1-3.
 * Feb 19, 2016: Added faster level 2 + 3 compression modes.
-* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progresssion in terms of compression. New default level is 5.
+* Feb 19, 2016: [Rebalanced compression levels](https://blog.klauspost.com/rebalancing-deflate-compression-levels/), so there is a more even progression in terms of compression. New default level is 5.
 * Feb 14, 2016: Snappy: Merge upstream changes. 
 * Feb 14, 2016: Snappy: Fix aggressive skipping.
 * Feb 14, 2016: Snappy: Update benchmark.
diff --git a/vendor/github.com/klauspost/compress/flate/deflate.go b/vendor/github.com/klauspost/compress/flate/deflate.go
index 66d1657d..af53fb86 100644
--- a/vendor/github.com/klauspost/compress/flate/deflate.go
+++ b/vendor/github.com/klauspost/compress/flate/deflate.go
@@ -861,7 +861,7 @@ func (d *compressor) reset(w io.Writer) {
 	}
 	switch d.compressionLevel.chain {
 	case 0:
-		// level was NoCompression or ConstantCompresssion.
+		// level was NoCompression or ConstantCompression.
 		d.windowEnd = 0
 	default:
 		s := d.state
diff --git a/vendor/github.com/klauspost/compress/flate/inflate.go b/vendor/github.com/klauspost/compress/flate/inflate.go
index 2f410d64..0d7b437f 100644
--- a/vendor/github.com/klauspost/compress/flate/inflate.go
+++ b/vendor/github.com/klauspost/compress/flate/inflate.go
@@ -298,6 +298,14 @@ const (
 	huffmanGenericReader
 )
 
+// flushMode tells decompressor when to return data
+type flushMode uint8
+
+const (
+	syncFlush    flushMode = iota // return data after sync flush block
+	partialFlush                  // return data after each block
+)
+
 // Decompress state.
 type decompressor struct {
 	// Input source.
@@ -332,6 +340,8 @@ type decompressor struct {
 
 	nb    uint
 	final bool
+
+	flushMode flushMode
 }
 
 func (f *decompressor) nextBlock() {
@@ -618,7 +628,10 @@ func (f *decompressor) dataBlock() {
 	}
 
 	if n == 0 {
-		f.toRead = f.dict.readFlush()
+		if f.flushMode == syncFlush {
+			f.toRead = f.dict.readFlush()
+		}
+
 		f.finishBlock()
 		return
 	}
@@ -657,8 +670,12 @@ func (f *decompressor) finishBlock() {
 		if f.dict.availRead() > 0 {
 			f.toRead = f.dict.readFlush()
 		}
+
 		f.err = io.EOF
+	} else if f.flushMode == partialFlush && f.dict.availRead() > 0 {
+		f.toRead = f.dict.readFlush()
 	}
+
 	f.step = nextBlock
 }
 
@@ -789,15 +806,25 @@ func (f *decompressor) Reset(r io.Reader, dict []byte) error {
 	return nil
 }
 
-// NewReader returns a new ReadCloser that can be used
-// to read the uncompressed version of r.
-// If r does not also implement io.ByteReader,
-// the decompressor may read more data than necessary from r.
-// It is the caller's responsibility to call Close on the ReadCloser
-// when finished reading.
-//
-// The ReadCloser returned by NewReader also implements Resetter.
-func NewReader(r io.Reader) io.ReadCloser {
+type ReaderOpt func(*decompressor)
+
+// WithPartialBlock tells decompressor to return after each block,
+// so it can read data written with partial flush
+func WithPartialBlock() ReaderOpt {
+	return func(f *decompressor) {
+		f.flushMode = partialFlush
+	}
+}
+
+// WithDict initializes the reader with a preset dictionary
+func WithDict(dict []byte) ReaderOpt {
+	return func(f *decompressor) {
+		f.dict.init(maxMatchOffset, dict)
+	}
+}
+
+// NewReaderOpts returns new reader with provided options
+func NewReaderOpts(r io.Reader, opts ...ReaderOpt) io.ReadCloser {
 	fixedHuffmanDecoderInit()
 
 	var f decompressor
@@ -806,9 +833,26 @@ func NewReader(r io.Reader) io.ReadCloser {
 	f.codebits = new([numCodes]int)
 	f.step = nextBlock
 	f.dict.init(maxMatchOffset, nil)
+
+	for _, opt := range opts {
+		opt(&f)
+	}
+
 	return &f
 }
 
+// NewReader returns a new ReadCloser that can be used
+// to read the uncompressed version of r.
+// If r does not also implement io.ByteReader,
+// the decompressor may read more data than necessary from r.
+// It is the caller's responsibility to call Close on the ReadCloser
+// when finished reading.
+//
+// The ReadCloser returned by NewReader also implements Resetter.
+func NewReader(r io.Reader) io.ReadCloser {
+	return NewReaderOpts(r)
+}
+
 // NewReaderDict is like NewReader but initializes the reader
 // with a preset dictionary. The returned Reader behaves as if
 // the uncompressed data stream started with the given dictionary,
@@ -817,13 +861,5 @@ func NewReader(r io.Reader) io.ReadCloser {
 //
 // The ReadCloser returned by NewReader also implements Resetter.
 func NewReaderDict(r io.Reader, dict []byte) io.ReadCloser {
-	fixedHuffmanDecoderInit()
-
-	var f decompressor
-	f.r = makeReader(r)
-	f.bits = new([maxNumLit + maxNumDist]int)
-	f.codebits = new([numCodes]int)
-	f.step = nextBlock
-	f.dict.init(maxMatchOffset, dict)
-	return &f
+	return NewReaderOpts(r, WithDict(dict))
 }
diff --git a/vendor/github.com/klauspost/compress/fse/decompress.go b/vendor/github.com/klauspost/compress/fse/decompress.go
index cc05d0f7..0c7dd4ff 100644
--- a/vendor/github.com/klauspost/compress/fse/decompress.go
+++ b/vendor/github.com/klauspost/compress/fse/decompress.go
@@ -15,7 +15,7 @@ const (
 // It is possible, but by no way guaranteed that corrupt data will
 // return an error.
 // It is up to the caller to verify integrity of the returned data.
-// Use a predefined Scrach to set maximum acceptable output size.
+// Use a predefined Scratch to set maximum acceptable output size.
 func Decompress(b []byte, s *Scratch) ([]byte, error) {
 	s, err := s.prepare(b)
 	if err != nil {
diff --git a/vendor/github.com/klauspost/compress/huff0/decompress.go b/vendor/github.com/klauspost/compress/huff0/decompress.go
index 54bd08b2..0f56b02d 100644
--- a/vendor/github.com/klauspost/compress/huff0/decompress.go
+++ b/vendor/github.com/klauspost/compress/huff0/decompress.go
@@ -1136,7 +1136,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) {
 			errs++
 		}
 		if errs > 0 {
-			fmt.Fprintf(w, "%d errros in base, stopping\n", errs)
+			fmt.Fprintf(w, "%d errors in base, stopping\n", errs)
 			continue
 		}
 		// Ensure that all combinations are covered.
@@ -1152,7 +1152,7 @@ func (s *Scratch) matches(ct cTable, w io.Writer) {
 				errs++
 			}
 			if errs > 20 {
-				fmt.Fprintf(w, "%d errros, stopping\n", errs)
+				fmt.Fprintf(w, "%d errors, stopping\n", errs)
 				break
 			}
 		}
diff --git a/vendor/github.com/klauspost/compress/s2/encode.go b/vendor/github.com/klauspost/compress/s2/encode.go
index 0c9088ad..20b80227 100644
--- a/vendor/github.com/klauspost/compress/s2/encode.go
+++ b/vendor/github.com/klauspost/compress/s2/encode.go
@@ -9,6 +9,9 @@ import (
 	"encoding/binary"
 	"math"
 	"math/bits"
+	"sync"
+
+	"github.com/klauspost/compress/internal/race"
 )
 
 // Encode returns the encoded form of src. The returned slice may be a sub-
@@ -52,6 +55,8 @@ func Encode(dst, src []byte) []byte {
 	return dst[:d]
 }
 
+var estblockPool [2]sync.Pool
+
 // EstimateBlockSize will perform a very fast compression
 // without outputting the result and return the compressed output size.
 // The function returns -1 if no improvement could be achieved.
@@ -61,9 +66,25 @@ func EstimateBlockSize(src []byte) (d int) {
 		return -1
 	}
 	if len(src) <= 1024 {
-		d = calcBlockSizeSmall(src)
+		const sz, pool = 2048, 0
+		tmp, ok := estblockPool[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer estblockPool[pool].Put(tmp)
+
+		d = calcBlockSizeSmall(src, tmp)
 	} else {
-		d = calcBlockSize(src)
+		const sz, pool = 32768, 1
+		tmp, ok := estblockPool[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer estblockPool[pool].Put(tmp)
+
+		d = calcBlockSize(src, tmp)
 	}
 
 	if d == 0 {
diff --git a/vendor/github.com/klauspost/compress/s2/encode_amd64.go b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
index 4f45206a..7aadd255 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_amd64.go
@@ -3,10 +3,16 @@
 
 package s2
 
-import "github.com/klauspost/compress/internal/race"
+import (
+	"sync"
+
+	"github.com/klauspost/compress/internal/race"
+)
 
 const hasAmd64Asm = true
 
+var encPools [4]sync.Pool
+
 // encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
@@ -29,23 +35,60 @@ func encodeBlock(dst, src []byte) (d int) {
 	)
 
 	if len(src) >= 4<<20 {
-		return encodeBlockAsm(dst, src)
+		const sz, pool = 65536, 0
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeBlockAsm(dst, src, tmp)
 	}
 	if len(src) >= limit12B {
-		return encodeBlockAsm4MB(dst, src)
+		const sz, pool = 65536, 0
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeBlockAsm4MB(dst, src, tmp)
 	}
 	if len(src) >= limit10B {
-		return encodeBlockAsm12B(dst, src)
+		const sz, pool = 16384, 1
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeBlockAsm12B(dst, src, tmp)
 	}
 	if len(src) >= limit8B {
-		return encodeBlockAsm10B(dst, src)
+		const sz, pool = 4096, 2
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeBlockAsm10B(dst, src, tmp)
 	}
 	if len(src) < minNonLiteralBlockSize {
 		return 0
 	}
-	return encodeBlockAsm8B(dst, src)
+	const sz, pool = 1024, 3
+	tmp, ok := encPools[pool].Get().(*[sz]byte)
+	if !ok {
+		tmp = &[sz]byte{}
+	}
+	race.WriteSlice(tmp[:])
+	defer encPools[pool].Put(tmp)
+	return encodeBlockAsm8B(dst, src, tmp)
 }
 
+var encBetterPools [5]sync.Pool
+
 // encodeBlockBetter encodes a non-empty src to a guaranteed-large-enough dst. It
 // assumes that the varint-encoded length of the decompressed bytes has already
 // been written.
@@ -68,21 +111,59 @@ func encodeBlockBetter(dst, src []byte) (d int) {
 	)
 
 	if len(src) > 4<<20 {
-		return encodeBetterBlockAsm(dst, src)
+		const sz, pool = 589824, 0
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+		return encodeBetterBlockAsm(dst, src, tmp)
 	}
 	if len(src) >= limit12B {
-		return encodeBetterBlockAsm4MB(dst, src)
+		const sz, pool = 589824, 0
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+
+		return encodeBetterBlockAsm4MB(dst, src, tmp)
 	}
 	if len(src) >= limit10B {
-		return encodeBetterBlockAsm12B(dst, src)
+		const sz, pool = 81920, 0
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+
+		return encodeBetterBlockAsm12B(dst, src, tmp)
 	}
 	if len(src) >= limit8B {
-		return encodeBetterBlockAsm10B(dst, src)
+		const sz, pool = 20480, 1
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+		return encodeBetterBlockAsm10B(dst, src, tmp)
 	}
 	if len(src) < minNonLiteralBlockSize {
 		return 0
 	}
-	return encodeBetterBlockAsm8B(dst, src)
+
+	const sz, pool = 5120, 2
+	tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+	if !ok {
+		tmp = &[sz]byte{}
+	}
+	race.WriteSlice(tmp[:])
+	defer encBetterPools[pool].Put(tmp)
+	return encodeBetterBlockAsm8B(dst, src, tmp)
 }
 
 // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
@@ -105,22 +186,57 @@ func encodeBlockSnappy(dst, src []byte) (d int) {
 		// Use 8 bit table when less than...
 		limit8B = 512
 	)
-	if len(src) >= 64<<10 {
-		return encodeSnappyBlockAsm(dst, src)
+	if len(src) > 65536 {
+		const sz, pool = 65536, 0
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeSnappyBlockAsm(dst, src, tmp)
 	}
 	if len(src) >= limit12B {
-		return encodeSnappyBlockAsm64K(dst, src)
+		const sz, pool = 65536, 0
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeSnappyBlockAsm64K(dst, src, tmp)
 	}
 	if len(src) >= limit10B {
-		return encodeSnappyBlockAsm12B(dst, src)
+		const sz, pool = 16384, 1
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeSnappyBlockAsm12B(dst, src, tmp)
 	}
 	if len(src) >= limit8B {
-		return encodeSnappyBlockAsm10B(dst, src)
+		const sz, pool = 4096, 2
+		tmp, ok := encPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encPools[pool].Put(tmp)
+		return encodeSnappyBlockAsm10B(dst, src, tmp)
 	}
 	if len(src) < minNonLiteralBlockSize {
 		return 0
 	}
-	return encodeSnappyBlockAsm8B(dst, src)
+	const sz, pool = 1024, 3
+	tmp, ok := encPools[pool].Get().(*[sz]byte)
+	if !ok {
+		tmp = &[sz]byte{}
+	}
+	race.WriteSlice(tmp[:])
+	defer encPools[pool].Put(tmp)
+	return encodeSnappyBlockAsm8B(dst, src, tmp)
 }
 
 // encodeBlockSnappy encodes a non-empty src to a guaranteed-large-enough dst. It
@@ -143,20 +259,59 @@ func encodeBlockBetterSnappy(dst, src []byte) (d int) {
 		// Use 8 bit table when less than...
 		limit8B = 512
 	)
-	if len(src) >= 64<<10 {
-		return encodeSnappyBetterBlockAsm(dst, src)
+	if len(src) > 65536 {
+		const sz, pool = 589824, 0
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+		return encodeSnappyBetterBlockAsm(dst, src, tmp)
 	}
+
 	if len(src) >= limit12B {
-		return encodeSnappyBetterBlockAsm64K(dst, src)
+		const sz, pool = 294912, 4
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+
+		return encodeSnappyBetterBlockAsm64K(dst, src, tmp)
 	}
 	if len(src) >= limit10B {
-		return encodeSnappyBetterBlockAsm12B(dst, src)
+		const sz, pool = 81920, 0
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+
+		return encodeSnappyBetterBlockAsm12B(dst, src, tmp)
 	}
 	if len(src) >= limit8B {
-		return encodeSnappyBetterBlockAsm10B(dst, src)
+		const sz, pool = 20480, 1
+		tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+		if !ok {
+			tmp = &[sz]byte{}
+		}
+		race.WriteSlice(tmp[:])
+		defer encBetterPools[pool].Put(tmp)
+		return encodeSnappyBetterBlockAsm10B(dst, src, tmp)
 	}
 	if len(src) < minNonLiteralBlockSize {
 		return 0
 	}
-	return encodeSnappyBetterBlockAsm8B(dst, src)
+
+	const sz, pool = 5120, 2
+	tmp, ok := encBetterPools[pool].Get().(*[sz]byte)
+	if !ok {
+		tmp = &[sz]byte{}
+	}
+	race.WriteSlice(tmp[:])
+	defer encBetterPools[pool].Put(tmp)
+	return encodeSnappyBetterBlockAsm8B(dst, src, tmp)
 }
diff --git a/vendor/github.com/klauspost/compress/s2/encode_go.go b/vendor/github.com/klauspost/compress/s2/encode_go.go
index 6b393c34..dd1c973c 100644
--- a/vendor/github.com/klauspost/compress/s2/encode_go.go
+++ b/vendor/github.com/klauspost/compress/s2/encode_go.go
@@ -317,7 +317,7 @@ func matchLen(a []byte, b []byte) int {
 }
 
 // input must be > inputMargin
-func calcBlockSize(src []byte) (d int) {
+func calcBlockSize(src []byte, _ *[32768]byte) (d int) {
 	// Initialize the hash table.
 	const (
 		tableBits    = 13
@@ -503,7 +503,7 @@ emitRemainder:
 }
 
 // length must be > inputMargin.
-func calcBlockSizeSmall(src []byte) (d int) {
+func calcBlockSizeSmall(src []byte, _ *[2048]byte) (d int) {
 	// Initialize the hash table.
 	const (
 		tableBits    = 9
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
index 297e4150..f43aa815 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.go
@@ -11,154 +11,154 @@ func _dummy_()
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBlockAsm(dst []byte, src []byte) int
+func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
 
 // encodeBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4194304 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBlockAsm4MB(dst []byte, src []byte) int
+func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int
 
 // encodeBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 16383 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBlockAsm12B(dst []byte, src []byte) int
+func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
 
 // encodeBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4095 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBlockAsm10B(dst []byte, src []byte) int
+func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
 
 // encodeBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 511 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBlockAsm8B(dst []byte, src []byte) int
+func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
 
 // encodeBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4294967295 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBetterBlockAsm(dst []byte, src []byte) int
+func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
 
 // encodeBetterBlockAsm4MB encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4194304 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
+func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int
 
 // encodeBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 16383 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBetterBlockAsm12B(dst []byte, src []byte) int
+func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
 
 // encodeBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4095 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBetterBlockAsm10B(dst []byte, src []byte) int
+func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
 
 // encodeBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 511 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeBetterBlockAsm8B(dst []byte, src []byte) int
+func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
 
 // encodeSnappyBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4294967295 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBlockAsm(dst []byte, src []byte) int
+func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
 
 // encodeSnappyBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 65535 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
+func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int
 
 // encodeSnappyBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 16383 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
+func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
 
 // encodeSnappyBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4095 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
+func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
 
 // encodeSnappyBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 511 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
+func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
 
 // encodeSnappyBetterBlockAsm encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4294967295 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
 
 // encodeSnappyBetterBlockAsm64K encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 65535 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int
 
 // encodeSnappyBetterBlockAsm12B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 16383 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
 
 // encodeSnappyBetterBlockAsm10B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4095 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
 
 // encodeSnappyBetterBlockAsm8B encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 511 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
+func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
 
 // calcBlockSize encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 4294967295 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func calcBlockSize(src []byte) int
+func calcBlockSize(src []byte, tmp *[32768]byte) int
 
 // calcBlockSizeSmall encodes a non-empty src to a guaranteed-large-enough dst.
 // Maximum input 1024 bytes.
 // It assumes that the varint-encoded length of the decompressed bytes has already been written.
 //
 //go:noescape
-func calcBlockSizeSmall(src []byte) int
+func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int
 
 // emitLiteral writes a literal chunk and returns the number of bytes written.
 //
diff --git a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
index 2ff5b334..df9be687 100644
--- a/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
+++ b/vendor/github.com/klauspost/compress/s2/encodeblock_amd64.s
@@ -13,1270 +13,1271 @@ TEXT ·_dummy_(SB), $0
 #endif
 	RET
 
-// func encodeBlockAsm(dst []byte, src []byte) int
+// func encodeBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBlockAsm(SB), $65560-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000200, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000200, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBlockAsm:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBlockAsm
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBlockAsm:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x06, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x06, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBlockAsm
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x10, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x10, R11
+	IMULQ R9, R11
+	SHRQ  $0x32, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeBlockAsm
-	LEAL  1(CX), SI
-	MOVL  12(SP), DI
-	MOVL  SI, BX
-	SUBL  16(SP), BX
+	LEAL  1(DX), DI
+	MOVL  12(SP), R8
+	MOVL  DI, SI
+	SUBL  16(SP), SI
 	JZ    repeat_extend_back_end_encodeBlockAsm
 
 repeat_extend_back_loop_encodeBlockAsm:
-	CMPL SI, DI
+	CMPL DI, R8
 	JBE  repeat_extend_back_end_encodeBlockAsm
-	MOVB -1(DX)(BX*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(SI*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeBlockAsm
-	LEAL -1(SI), SI
-	DECL BX
+	LEAL -1(DI), DI
+	DECL SI
 	JNZ  repeat_extend_back_loop_encodeBlockAsm
 
 repeat_extend_back_end_encodeBlockAsm:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 5(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 5(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeBlockAsm:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeBlockAsm
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeBlockAsm
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_repeat_emit_encodeBlockAsm
-	CMPL BX, $0x01000000
+	CMPL SI, $0x01000000
 	JB   four_bytes_repeat_emit_encodeBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL BX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL SI, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 
 four_bytes_repeat_emit_encodeBlockAsm:
-	MOVL BX, R10
-	SHRL $0x10, R10
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R10, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R11
+	SHRL $0x10, R11
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R11, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 
 three_bytes_repeat_emit_encodeBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 
 two_bytes_repeat_emit_encodeBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeBlockAsm
 	JMP  memmove_long_repeat_emit_encodeBlockAsm
 
 one_byte_repeat_emit_encodeBlockAsm:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_repeat_emit_encodeBlockAsm:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm
 
 memmove_long_repeat_emit_encodeBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R11
-	SHRQ  $0x05, R11
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R12
-	SUBQ  R10, R12
-	DECQ  R11
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R12
+	SHRQ  $0x05, R12
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R13
+	SUBQ  R11, R13
+	DECQ  R12
 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R12*1), R10
-	LEAQ  -32(AX)(R12*1), R13
+	LEAQ  -32(R10)(R13*1), R11
+	LEAQ  -32(CX)(R13*1), R14
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R13)
-	MOVOA X5, 16(R13)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R14)
+	MOVOA X5, 16(R14)
+	ADDQ  $0x20, R14
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R13
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R12
-	DECQ  R11
+	DECQ  R12
 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R12*1), X4
-	MOVOU -16(R9)(R12*1), X5
-	MOVOA X4, -32(AX)(R12*1)
-	MOVOA X5, -16(AX)(R12*1)
-	ADDQ  $0x20, R12
-	CMPQ  R8, R12
+	MOVOU -32(R10)(R13*1), X4
+	MOVOU -16(R10)(R13*1), X5
+	MOVOA X4, -32(CX)(R13*1)
+	MOVOA X5, -16(CX)(R13*1)
+	ADDQ  $0x20, R13
+	CMPQ  R9, R13
 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeBlockAsm:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), R8
-	SUBL CX, R8
-	LEAQ (DX)(CX*1), R9
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R9
+	SUBL DX, R9
+	LEAQ (BX)(DX*1), R10
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_repeat_extend_encodeBlockAsm:
-	CMPL R8, $0x10
+	CMPL R9, $0x10
 	JB   matchlen_match8_repeat_extend_encodeBlockAsm
-	MOVQ (R9)(R11*1), R10
-	MOVQ 8(R9)(R11*1), R12
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	MOVQ 8(R10)(R12*1), R13
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
-	XORQ 8(BX)(R11*1), R12
+	XORQ 8(SI)(R12*1), R13
 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm
-	LEAL -16(R8), R8
-	LEAL 16(R11), R11
+	LEAL -16(R9), R9
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm
 
 matchlen_bsf_16repeat_extend_encodeBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm
 
 matchlen_match8_repeat_extend_encodeBlockAsm:
-	CMPL R8, $0x08
+	CMPL R9, $0x08
 	JB   matchlen_match4_repeat_extend_encodeBlockAsm
-	MOVQ (R9)(R11*1), R10
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm
-	LEAL -8(R8), R8
-	LEAL 8(R11), R11
+	LEAL -8(R9), R9
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm
 
 matchlen_bsf_8_repeat_extend_encodeBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm
 
 matchlen_match4_repeat_extend_encodeBlockAsm:
-	CMPL R8, $0x04
+	CMPL R9, $0x04
 	JB   matchlen_match2_repeat_extend_encodeBlockAsm
-	MOVL (R9)(R11*1), R10
-	CMPL (BX)(R11*1), R10
+	MOVL (R10)(R12*1), R11
+	CMPL (SI)(R12*1), R11
 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm
-	LEAL -4(R8), R8
-	LEAL 4(R11), R11
+	LEAL -4(R9), R9
+	LEAL 4(R12), R12
 
 matchlen_match2_repeat_extend_encodeBlockAsm:
-	CMPL R8, $0x01
+	CMPL R9, $0x01
 	JE   matchlen_match1_repeat_extend_encodeBlockAsm
 	JB   repeat_extend_forward_end_encodeBlockAsm
-	MOVW (R9)(R11*1), R10
-	CMPW (BX)(R11*1), R10
+	MOVW (R10)(R12*1), R11
+	CMPW (SI)(R12*1), R11
 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm
-	LEAL 2(R11), R11
-	SUBL $0x02, R8
+	LEAL 2(R12), R12
+	SUBL $0x02, R9
 	JZ   repeat_extend_forward_end_encodeBlockAsm
 
 matchlen_match1_repeat_extend_encodeBlockAsm:
-	MOVB (R9)(R11*1), R10
-	CMPB (BX)(R11*1), R10
+	MOVB (R10)(R12*1), R11
+	CMPB (SI)(R12*1), R11
 	JNE  repeat_extend_forward_end_encodeBlockAsm
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 repeat_extend_forward_end_encodeBlockAsm:
-	ADDL  R11, CX
-	MOVL  CX, BX
-	SUBL  SI, BX
-	MOVL  16(SP), SI
-	TESTL DI, DI
+	ADDL  R12, DX
+	MOVL  DX, SI
+	SUBL  DI, SI
+	MOVL  16(SP), DI
+	TESTL R8, R8
 	JZ    repeat_as_copy_encodeBlockAsm
 
 	// emitRepeat
 emit_repeat_again_match_repeat_encodeBlockAsm:
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_match_repeat_encodeBlockAsm
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_match_repeat_encodeBlockAsm
 
 cant_repeat_two_offset_match_repeat_encodeBlockAsm:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_match_repeat_encodeBlockAsm
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_match_repeat_encodeBlockAsm
-	CMPL BX, $0x0100ffff
+	CMPL SI, $0x0100ffff
 	JB   repeat_five_match_repeat_encodeBlockAsm
-	LEAL -16842747(BX), BX
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(SI), SI
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_repeat_encodeBlockAsm
 
 repeat_five_match_repeat_encodeBlockAsm:
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_four_match_repeat_encodeBlockAsm:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_three_match_repeat_encodeBlockAsm:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_match_repeat_encodeBlockAsm:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_offset_match_repeat_encodeBlockAsm:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_as_copy_encodeBlockAsm:
 	// emitCopy
-	CMPL SI, $0x00010000
+	CMPL DI, $0x00010000
 	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm
-	MOVB $0xff, (AX)
-	MOVL SI, 1(AX)
-	LEAL -64(BX), BX
-	ADDQ $0x05, AX
-	CMPL BX, $0x04
+	MOVB $0xff, (CX)
+	MOVL DI, 1(CX)
+	LEAL -64(SI), SI
+	ADDQ $0x05, CX
+	CMPL SI, $0x04
 	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm
 
 	// emitRepeat
 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy:
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy
-	CMPL BX, $0x0100ffff
+	CMPL SI, $0x0100ffff
 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy
-	LEAL -16842747(BX), BX
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(SI), SI
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy
 
 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy:
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 four_bytes_remain_repeat_as_copy_encodeBlockAsm:
-	TESTL BX, BX
+	TESTL SI, SI
 	JZ    repeat_end_emit_encodeBlockAsm
-	XORL  DI, DI
-	LEAL  -1(DI)(BX*4), BX
-	MOVB  BL, (AX)
-	MOVL  SI, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  R8, R8
+	LEAL  -1(R8)(SI*4), SI
+	MOVB  SI, (CX)
+	MOVL  DI, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   repeat_end_emit_encodeBlockAsm
 
 two_byte_offset_repeat_as_copy_encodeBlockAsm:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm
-	MOVL $0x00000001, DI
-	LEAL 16(DI), DI
-	MOVB SI, 1(AX)
-	MOVL SI, R8
-	SHRL $0x08, R8
-	SHLL $0x05, R8
-	ORL  R8, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, BX
+	MOVL $0x00000001, R8
+	LEAL 16(R8), R8
+	MOVB DI, 1(CX)
+	MOVL DI, R9
+	SHRL $0x08, R9
+	SHLL $0x05, R9
+	ORL  R9, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, SI
 
 	// emitRepeat
-	LEAL -4(BX), BX
+	LEAL -4(SI), SI
 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 
 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
-	CMPL BX, $0x0100ffff
+	CMPL SI, $0x0100ffff
 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
-	LEAL -16842747(BX), BX
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(SI), SI
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b
 
 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short_2b:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 long_offset_short_repeat_as_copy_encodeBlockAsm:
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 
 	// emitRepeat
 emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short:
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short
-	CMPL BX, $0x0100ffff
+	CMPL SI, $0x0100ffff
 	JB   repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short
-	LEAL -16842747(BX), BX
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(SI), SI
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_repeat_as_copy_encodeBlockAsm_emit_copy_short
 
 repeat_five_repeat_as_copy_encodeBlockAsm_emit_copy_short:
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_four_repeat_as_copy_encodeBlockAsm_emit_copy_short:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_three_repeat_as_copy_encodeBlockAsm_emit_copy_short:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_repeat_as_copy_encodeBlockAsm_emit_copy_short:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm_emit_copy_short:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 two_byte_offset_short_repeat_as_copy_encodeBlockAsm:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm
 
 emit_copy_three_repeat_as_copy_encodeBlockAsm:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeBlockAsm:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeBlockAsm
 
 no_repeat_found_encodeBlockAsm:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeBlockAsm
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeBlockAsm
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeBlockAsm
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBlockAsm
 
 candidate3_match_encodeBlockAsm:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeBlockAsm
 
 candidate2_match_encodeBlockAsm:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeBlockAsm:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBlockAsm
 
 match_extend_back_loop_encodeBlockAsm:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBlockAsm
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBlockAsm
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBlockAsm
 	JMP  match_extend_back_loop_encodeBlockAsm
 
 match_extend_back_end_encodeBlockAsm:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 5(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 5(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBlockAsm:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeBlockAsm
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeBlockAsm
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeBlockAsm
-	CMPL DI, $0x00010000
+	CMPL R8, $0x00010000
 	JB   three_bytes_match_emit_encodeBlockAsm
-	CMPL DI, $0x01000000
+	CMPL R8, $0x01000000
 	JB   four_bytes_match_emit_encodeBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL DI, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL R8, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm
 
 four_bytes_match_emit_encodeBlockAsm:
-	MOVL DI, R9
-	SHRL $0x10, R9
-	MOVB $0xf8, (AX)
-	MOVW DI, 1(AX)
-	MOVB R9, 3(AX)
-	ADDQ $0x04, AX
+	MOVL R8, R10
+	SHRL $0x10, R10
+	MOVB $0xf8, (CX)
+	MOVW R8, 1(CX)
+	MOVB R10, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm
 
 three_bytes_match_emit_encodeBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm
 
 two_bytes_match_emit_encodeBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeBlockAsm
 	JMP  memmove_long_match_emit_encodeBlockAsm
 
 one_byte_match_emit_encodeBlockAsm:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBlockAsm:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm
 
 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm
 
 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBlockAsm
 
 emit_lit_memmove_match_emit_encodeBlockAsm_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBlockAsm:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeBlockAsm
 
 memmove_long_match_emit_encodeBlockAsm:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeBlockAsm:
 match_nolit_loop_encodeBlockAsm:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeBlockAsm:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeBlockAsm
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm
 
 matchlen_bsf_16match_nolit_encodeBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeBlockAsm
 
 matchlen_match8_match_nolit_encodeBlockAsm:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeBlockAsm
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeBlockAsm
 
 matchlen_bsf_8_match_nolit_encodeBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeBlockAsm
 
 matchlen_match4_match_nolit_encodeBlockAsm:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeBlockAsm
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeBlockAsm
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeBlockAsm:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeBlockAsm
 	JB   match_nolit_end_encodeBlockAsm
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeBlockAsm
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeBlockAsm
 
 matchlen_match1_match_nolit_encodeBlockAsm:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeBlockAsm
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeBlockAsm:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   two_byte_offset_match_nolit_encodeBlockAsm
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  four_bytes_remain_match_nolit_encodeBlockAsm
-	MOVB $0xff, (AX)
-	MOVL BX, 1(AX)
-	LEAL -64(R9), R9
-	ADDQ $0x05, AX
-	CMPL R9, $0x04
+	MOVB $0xff, (CX)
+	MOVL SI, 1(CX)
+	LEAL -64(R10), R10
+	ADDQ $0x05, CX
+	CMPL R10, $0x04
 	JB   four_bytes_remain_match_nolit_encodeBlockAsm
 
 	// emitRepeat
 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy:
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy
-	CMPL R9, $0x00010100
+	CMPL R10, $0x00010100
 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy
-	CMPL R9, $0x0100ffff
+	CMPL R10, $0x0100ffff
 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy
-	LEAL -16842747(R9), R9
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(R10), R10
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy
 
 repeat_five_match_nolit_encodeBlockAsm_emit_copy:
-	LEAL -65536(R9), R9
-	MOVL R9, BX
-	MOVW $0x001d, (AX)
-	MOVW R9, 2(AX)
-	SARL $0x10, BX
-	MOVB BL, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R10), R10
+	MOVL R10, SI
+	MOVW $0x001d, (CX)
+	MOVW R10, 2(CX)
+	SARL $0x10, SI
+	MOVB SI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_four_match_nolit_encodeBlockAsm_emit_copy:
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_three_match_nolit_encodeBlockAsm_emit_copy:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_two_match_nolit_encodeBlockAsm_emit_copy:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 four_bytes_remain_match_nolit_encodeBlockAsm:
-	TESTL R9, R9
+	TESTL R10, R10
 	JZ    match_nolit_emitcopy_end_encodeBlockAsm
-	XORL  SI, SI
-	LEAL  -1(SI)(R9*4), R9
-	MOVB  R9, (AX)
-	MOVL  BX, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  DI, DI
+	LEAL  -1(DI)(R10*4), R10
+	MOVB  R10, (CX)
+	MOVL  SI, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   match_nolit_emitcopy_end_encodeBlockAsm
 
 two_byte_offset_match_nolit_encodeBlockAsm:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBlockAsm
-	MOVL $0x00000001, SI
-	LEAL 16(SI), SI
-	MOVB BL, 1(AX)
-	MOVL BX, DI
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R9
+	MOVL $0x00000001, DI
+	LEAL 16(DI), DI
+	MOVB SI, 1(CX)
+	MOVL SI, R8
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R10
 
 	// emitRepeat
-	LEAL -4(R9), R9
+	LEAL -4(R10), R10
 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
 
 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b:
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b
-	CMPL R9, $0x00010100
+	CMPL R10, $0x00010100
 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b
-	CMPL R9, $0x0100ffff
+	CMPL R10, $0x0100ffff
 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b
-	LEAL -16842747(R9), R9
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(R10), R10
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short_2b
 
 repeat_five_match_nolit_encodeBlockAsm_emit_copy_short_2b:
-	LEAL -65536(R9), R9
-	MOVL R9, BX
-	MOVW $0x001d, (AX)
-	MOVW R9, 2(AX)
-	SARL $0x10, BX
-	MOVB BL, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R10), R10
+	MOVL R10, SI
+	MOVW $0x001d, (CX)
+	MOVW R10, 2(CX)
+	SARL $0x10, SI
+	MOVB SI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_four_match_nolit_encodeBlockAsm_emit_copy_short_2b:
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_three_match_nolit_encodeBlockAsm_emit_copy_short_2b:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_two_match_nolit_encodeBlockAsm_emit_copy_short_2b:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short_2b:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 long_offset_short_match_nolit_encodeBlockAsm:
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 
 	// emitRepeat
 emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short:
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm_emit_copy_short
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm_emit_copy_short
-	CMPL R9, $0x00010100
+	CMPL R10, $0x00010100
 	JB   repeat_four_match_nolit_encodeBlockAsm_emit_copy_short
-	CMPL R9, $0x0100ffff
+	CMPL R10, $0x0100ffff
 	JB   repeat_five_match_nolit_encodeBlockAsm_emit_copy_short
-	LEAL -16842747(R9), R9
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(R10), R10
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_nolit_encodeBlockAsm_emit_copy_short
 
 repeat_five_match_nolit_encodeBlockAsm_emit_copy_short:
-	LEAL -65536(R9), R9
-	MOVL R9, BX
-	MOVW $0x001d, (AX)
-	MOVW R9, 2(AX)
-	SARL $0x10, BX
-	MOVB BL, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R10), R10
+	MOVL R10, SI
+	MOVW $0x001d, (CX)
+	MOVW R10, 2(CX)
+	SARL $0x10, SI
+	MOVB SI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_four_match_nolit_encodeBlockAsm_emit_copy_short:
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_three_match_nolit_encodeBlockAsm_emit_copy_short:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_two_match_nolit_encodeBlockAsm_emit_copy_short:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 repeat_two_offset_match_nolit_encodeBlockAsm_emit_copy_short:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 two_byte_offset_short_match_nolit_encodeBlockAsm:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm
 
 emit_copy_three_match_nolit_encodeBlockAsm:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeBlockAsm:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBlockAsm
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBlockAsm:
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x10, DI
-	IMULQ R8, DI
-	SHRQ  $0x32, DI
-	SHLQ  $0x10, BX
-	IMULQ R8, BX
-	SHRQ  $0x32, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x10, R8
+	IMULQ R9, R8
+	SHRQ  $0x32, R8
+	SHLQ  $0x10, SI
+	IMULQ R9, SI
+	SHRQ  $0x32, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeBlockAsm
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeBlockAsm
 
 emit_remainder_encodeBlockAsm:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 5(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 5(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBlockAsm:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBlockAsm
@@ -1286,41 +1287,41 @@ emit_remainder_ok_encodeBlockAsm:
 	JB   three_bytes_emit_remainder_encodeBlockAsm
 	CMPL DX, $0x01000000
 	JB   four_bytes_emit_remainder_encodeBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL DX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL DX, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 
 four_bytes_emit_remainder_encodeBlockAsm:
 	MOVL DX, BX
 	SHRL $0x10, BX
-	MOVB $0xf8, (AX)
-	MOVW DX, 1(AX)
-	MOVB BL, 3(AX)
-	ADDQ $0x04, AX
+	MOVB $0xf8, (CX)
+	MOVW DX, 1(CX)
+	MOVB BL, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 
 three_bytes_emit_remainder_encodeBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 
 two_bytes_emit_remainder_encodeBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBlockAsm
 	JMP  memmove_long_emit_remainder_encodeBlockAsm
 
 one_byte_emit_remainder_encodeBlockAsm:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -1336,73 +1337,73 @@ memmove_emit_remainder_encodeBlockAsm:
 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBlockAsm:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm
 
 memmove_long_emit_remainder_encodeBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
 	MOVOU (SI), X4
@@ -1416,1199 +1417,1200 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBlockAsm:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBlockAsm4MB(dst []byte, src []byte) int
+// func encodeBlockAsm4MB(dst []byte, src []byte, tmp *[65536]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBlockAsm4MB(SB), $65560-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000200, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm4MB(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000200, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBlockAsm4MB:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBlockAsm4MB
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBlockAsm4MB:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x06, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x06, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBlockAsm4MB
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x10, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x10, R11
+	IMULQ R9, R11
+	SHRQ  $0x32, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeBlockAsm4MB
-	LEAL  1(CX), SI
-	MOVL  12(SP), DI
-	MOVL  SI, BX
-	SUBL  16(SP), BX
+	LEAL  1(DX), DI
+	MOVL  12(SP), R8
+	MOVL  DI, SI
+	SUBL  16(SP), SI
 	JZ    repeat_extend_back_end_encodeBlockAsm4MB
 
 repeat_extend_back_loop_encodeBlockAsm4MB:
-	CMPL SI, DI
+	CMPL DI, R8
 	JBE  repeat_extend_back_end_encodeBlockAsm4MB
-	MOVB -1(DX)(BX*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(SI*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeBlockAsm4MB
-	LEAL -1(SI), SI
-	DECL BX
+	LEAL -1(DI), DI
+	DECL SI
 	JNZ  repeat_extend_back_loop_encodeBlockAsm4MB
 
 repeat_extend_back_end_encodeBlockAsm4MB:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 4(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 4(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeBlockAsm4MB
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeBlockAsm4MB:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm4MB
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeBlockAsm4MB
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeBlockAsm4MB
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_repeat_emit_encodeBlockAsm4MB
-	MOVL BX, R10
-	SHRL $0x10, R10
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R10, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R11
+	SHRL $0x10, R11
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R11, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 
 three_bytes_repeat_emit_encodeBlockAsm4MB:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 
 two_bytes_repeat_emit_encodeBlockAsm4MB:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeBlockAsm4MB
 	JMP  memmove_long_repeat_emit_encodeBlockAsm4MB
 
 one_byte_repeat_emit_encodeBlockAsm4MB:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeBlockAsm4MB:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm4MB
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm4MB_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_repeat_emit_encodeBlockAsm4MB:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm4MB
 
 memmove_long_repeat_emit_encodeBlockAsm4MB:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R11
-	SHRQ  $0x05, R11
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R12
-	SUBQ  R10, R12
-	DECQ  R11
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R12
+	SHRQ  $0x05, R12
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R13
+	SUBQ  R11, R13
+	DECQ  R12
 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R12*1), R10
-	LEAQ  -32(AX)(R12*1), R13
+	LEAQ  -32(R10)(R13*1), R11
+	LEAQ  -32(CX)(R13*1), R14
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R13)
-	MOVOA X5, 16(R13)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R14)
+	MOVOA X5, 16(R14)
+	ADDQ  $0x20, R14
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R13
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R12
-	DECQ  R11
+	DECQ  R12
 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R12*1), X4
-	MOVOU -16(R9)(R12*1), X5
-	MOVOA X4, -32(AX)(R12*1)
-	MOVOA X5, -16(AX)(R12*1)
-	ADDQ  $0x20, R12
-	CMPQ  R8, R12
+	MOVOU -32(R10)(R13*1), X4
+	MOVOU -16(R10)(R13*1), X5
+	MOVOA X4, -32(CX)(R13*1)
+	MOVOA X5, -16(CX)(R13*1)
+	ADDQ  $0x20, R13
+	CMPQ  R9, R13
 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeBlockAsm4MB:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), R8
-	SUBL CX, R8
-	LEAQ (DX)(CX*1), R9
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R9
+	SUBL DX, R9
+	LEAQ (BX)(DX*1), R10
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB:
-	CMPL R8, $0x10
+	CMPL R9, $0x10
 	JB   matchlen_match8_repeat_extend_encodeBlockAsm4MB
-	MOVQ (R9)(R11*1), R10
-	MOVQ 8(R9)(R11*1), R12
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	MOVQ 8(R10)(R12*1), R13
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
-	XORQ 8(BX)(R11*1), R12
+	XORQ 8(SI)(R12*1), R13
 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm4MB
-	LEAL -16(R8), R8
-	LEAL 16(R11), R11
+	LEAL -16(R9), R9
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm4MB
 
 matchlen_bsf_16repeat_extend_encodeBlockAsm4MB:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
 
 matchlen_match8_repeat_extend_encodeBlockAsm4MB:
-	CMPL R8, $0x08
+	CMPL R9, $0x08
 	JB   matchlen_match4_repeat_extend_encodeBlockAsm4MB
-	MOVQ (R9)(R11*1), R10
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB
-	LEAL -8(R8), R8
-	LEAL 8(R11), R11
+	LEAL -8(R9), R9
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm4MB
 
 matchlen_bsf_8_repeat_extend_encodeBlockAsm4MB:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm4MB
 
 matchlen_match4_repeat_extend_encodeBlockAsm4MB:
-	CMPL R8, $0x04
+	CMPL R9, $0x04
 	JB   matchlen_match2_repeat_extend_encodeBlockAsm4MB
-	MOVL (R9)(R11*1), R10
-	CMPL (BX)(R11*1), R10
+	MOVL (R10)(R12*1), R11
+	CMPL (SI)(R12*1), R11
 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm4MB
-	LEAL -4(R8), R8
-	LEAL 4(R11), R11
+	LEAL -4(R9), R9
+	LEAL 4(R12), R12
 
 matchlen_match2_repeat_extend_encodeBlockAsm4MB:
-	CMPL R8, $0x01
+	CMPL R9, $0x01
 	JE   matchlen_match1_repeat_extend_encodeBlockAsm4MB
 	JB   repeat_extend_forward_end_encodeBlockAsm4MB
-	MOVW (R9)(R11*1), R10
-	CMPW (BX)(R11*1), R10
+	MOVW (R10)(R12*1), R11
+	CMPW (SI)(R12*1), R11
 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm4MB
-	LEAL 2(R11), R11
-	SUBL $0x02, R8
+	LEAL 2(R12), R12
+	SUBL $0x02, R9
 	JZ   repeat_extend_forward_end_encodeBlockAsm4MB
 
 matchlen_match1_repeat_extend_encodeBlockAsm4MB:
-	MOVB (R9)(R11*1), R10
-	CMPB (BX)(R11*1), R10
+	MOVB (R10)(R12*1), R11
+	CMPB (SI)(R12*1), R11
 	JNE  repeat_extend_forward_end_encodeBlockAsm4MB
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 repeat_extend_forward_end_encodeBlockAsm4MB:
-	ADDL  R11, CX
-	MOVL  CX, BX
-	SUBL  SI, BX
-	MOVL  16(SP), SI
-	TESTL DI, DI
+	ADDL  R12, DX
+	MOVL  DX, SI
+	SUBL  DI, SI
+	MOVL  16(SP), DI
+	TESTL R8, R8
 	JZ    repeat_as_copy_encodeBlockAsm4MB
 
 	// emitRepeat
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_match_repeat_encodeBlockAsm4MB
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_match_repeat_encodeBlockAsm4MB
 
 cant_repeat_two_offset_match_repeat_encodeBlockAsm4MB:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_match_repeat_encodeBlockAsm4MB
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_match_repeat_encodeBlockAsm4MB
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_four_match_repeat_encodeBlockAsm4MB:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_three_match_repeat_encodeBlockAsm4MB:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_match_repeat_encodeBlockAsm4MB:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_offset_match_repeat_encodeBlockAsm4MB:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_as_copy_encodeBlockAsm4MB:
 	// emitCopy
-	CMPL SI, $0x00010000
+	CMPL DI, $0x00010000
 	JB   two_byte_offset_repeat_as_copy_encodeBlockAsm4MB
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
-	MOVB $0xff, (AX)
-	MOVL SI, 1(AX)
-	LEAL -64(BX), BX
-	ADDQ $0x05, AX
-	CMPL BX, $0x04
+	MOVB $0xff, (CX)
+	MOVL DI, 1(CX)
+	LEAL -64(SI), SI
+	ADDQ $0x05, CX
+	CMPL SI, $0x04
 	JB   four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB
 
 	// emitRepeat
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 four_bytes_remain_repeat_as_copy_encodeBlockAsm4MB:
-	TESTL BX, BX
+	TESTL SI, SI
 	JZ    repeat_end_emit_encodeBlockAsm4MB
-	XORL  DI, DI
-	LEAL  -1(DI)(BX*4), BX
-	MOVB  BL, (AX)
-	MOVL  SI, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  R8, R8
+	LEAL  -1(R8)(SI*4), SI
+	MOVB  SI, (CX)
+	MOVL  DI, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   repeat_end_emit_encodeBlockAsm4MB
 
 two_byte_offset_repeat_as_copy_encodeBlockAsm4MB:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm4MB
-	MOVL $0x00000001, DI
-	LEAL 16(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, BX
+	MOVL $0x00000001, R8
+	LEAL 16(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, SI
 
 	// emitRepeat
-	LEAL -4(BX), BX
+	LEAL -4(SI), SI
 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short_2b:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 long_offset_short_repeat_as_copy_encodeBlockAsm4MB:
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
-	CMPL BX, $0x00010100
+	CMPL SI, $0x00010100
 	JB   repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short
-	LEAL -65536(BX), BX
-	MOVL BX, SI
-	MOVW $0x001d, (AX)
-	MOVW BX, 2(AX)
-	SARL $0x10, SI
-	MOVB SI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(SI), SI
+	MOVL SI, DI
+	MOVW $0x001d, (CX)
+	MOVW SI, 2(CX)
+	SARL $0x10, DI
+	MOVB DI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_four_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_three_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm4MB_emit_copy_short:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 two_byte_offset_short_repeat_as_copy_encodeBlockAsm4MB:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm4MB
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm4MB
 
 emit_copy_three_repeat_as_copy_encodeBlockAsm4MB:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeBlockAsm4MB:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeBlockAsm4MB
 
 no_repeat_found_encodeBlockAsm4MB:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeBlockAsm4MB
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeBlockAsm4MB
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeBlockAsm4MB
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBlockAsm4MB
 
 candidate3_match_encodeBlockAsm4MB:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeBlockAsm4MB
 
 candidate2_match_encodeBlockAsm4MB:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeBlockAsm4MB:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBlockAsm4MB
 
 match_extend_back_loop_encodeBlockAsm4MB:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBlockAsm4MB
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBlockAsm4MB
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBlockAsm4MB
 	JMP  match_extend_back_loop_encodeBlockAsm4MB
 
 match_extend_back_end_encodeBlockAsm4MB:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 4(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 4(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBlockAsm4MB
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBlockAsm4MB:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeBlockAsm4MB
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeBlockAsm4MB
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeBlockAsm4MB
-	CMPL DI, $0x00010000
+	CMPL R8, $0x00010000
 	JB   three_bytes_match_emit_encodeBlockAsm4MB
-	MOVL DI, R9
-	SHRL $0x10, R9
-	MOVB $0xf8, (AX)
-	MOVW DI, 1(AX)
-	MOVB R9, 3(AX)
-	ADDQ $0x04, AX
+	MOVL R8, R10
+	SHRL $0x10, R10
+	MOVB $0xf8, (CX)
+	MOVW R8, 1(CX)
+	MOVB R10, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
 
 three_bytes_match_emit_encodeBlockAsm4MB:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
 
 two_bytes_match_emit_encodeBlockAsm4MB:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeBlockAsm4MB
 	JMP  memmove_long_match_emit_encodeBlockAsm4MB
 
 one_byte_match_emit_encodeBlockAsm4MB:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBlockAsm4MB:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 
 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm4MB
 
 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBlockAsm4MB
 
 emit_lit_memmove_match_emit_encodeBlockAsm4MB_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBlockAsm4MB:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeBlockAsm4MB
 
 memmove_long_match_emit_encodeBlockAsm4MB:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm4MBlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeBlockAsm4MB:
 match_nolit_loop_encodeBlockAsm4MB:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeBlockAsm4MB:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeBlockAsm4MB
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm4MB
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm4MB
 
 matchlen_bsf_16match_nolit_encodeBlockAsm4MB:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeBlockAsm4MB
 
 matchlen_match8_match_nolit_encodeBlockAsm4MB:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeBlockAsm4MB
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm4MB
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeBlockAsm4MB
 
 matchlen_bsf_8_match_nolit_encodeBlockAsm4MB:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeBlockAsm4MB
 
 matchlen_match4_match_nolit_encodeBlockAsm4MB:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeBlockAsm4MB
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeBlockAsm4MB
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeBlockAsm4MB:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeBlockAsm4MB
 	JB   match_nolit_end_encodeBlockAsm4MB
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeBlockAsm4MB
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeBlockAsm4MB
 
 matchlen_match1_match_nolit_encodeBlockAsm4MB:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeBlockAsm4MB
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeBlockAsm4MB:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   two_byte_offset_match_nolit_encodeBlockAsm4MB
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  four_bytes_remain_match_nolit_encodeBlockAsm4MB
-	MOVB $0xff, (AX)
-	MOVL BX, 1(AX)
-	LEAL -64(R9), R9
-	ADDQ $0x05, AX
-	CMPL R9, $0x04
+	MOVB $0xff, (CX)
+	MOVL SI, 1(CX)
+	LEAL -64(R10), R10
+	ADDQ $0x05, CX
+	CMPL R10, $0x04
 	JB   four_bytes_remain_match_nolit_encodeBlockAsm4MB
 
 	// emitRepeat
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy
-	CMPL R9, $0x00010100
+	CMPL R10, $0x00010100
 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy
-	LEAL -65536(R9), R9
-	MOVL R9, BX
-	MOVW $0x001d, (AX)
-	MOVW R9, 2(AX)
-	SARL $0x10, BX
-	MOVB BL, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R10), R10
+	MOVL R10, SI
+	MOVW $0x001d, (CX)
+	MOVW R10, 2(CX)
+	SARL $0x10, SI
+	MOVB SI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy:
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 four_bytes_remain_match_nolit_encodeBlockAsm4MB:
-	TESTL R9, R9
+	TESTL R10, R10
 	JZ    match_nolit_emitcopy_end_encodeBlockAsm4MB
-	XORL  SI, SI
-	LEAL  -1(SI)(R9*4), R9
-	MOVB  R9, (AX)
-	MOVL  BX, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  DI, DI
+	LEAL  -1(DI)(R10*4), R10
+	MOVB  R10, (CX)
+	MOVL  SI, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 two_byte_offset_match_nolit_encodeBlockAsm4MB:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm4MB
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBlockAsm4MB
-	MOVL $0x00000001, SI
-	LEAL 16(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R9
-
+	MOVL $0x00000001, DI
+	LEAL 16(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R10
+
 	// emitRepeat
-	LEAL -4(R9), R9
+	LEAL -4(R10), R10
 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
-	CMPL R9, $0x00010100
+	CMPL R10, $0x00010100
 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b
-	LEAL -65536(R9), R9
-	MOVL R9, BX
-	MOVW $0x001d, (AX)
-	MOVW R9, 2(AX)
-	SARL $0x10, BX
-	MOVB BL, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R10), R10
+	MOVL R10, SI
+	MOVW $0x001d, (CX)
+	MOVW R10, 2(CX)
+	SARL $0x10, SI
+	MOVB SI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short_2b:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 long_offset_short_match_nolit_encodeBlockAsm4MB:
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short
-	CMPL R9, $0x00010100
+	CMPL R10, $0x00010100
 	JB   repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short
-	LEAL -65536(R9), R9
-	MOVL R9, BX
-	MOVW $0x001d, (AX)
-	MOVW R9, 2(AX)
-	SARL $0x10, BX
-	MOVB BL, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R10), R10
+	MOVL R10, SI
+	MOVW $0x001d, (CX)
+	MOVW R10, 2(CX)
+	SARL $0x10, SI
+	MOVB SI, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_four_match_nolit_encodeBlockAsm4MB_emit_copy_short:
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_three_match_nolit_encodeBlockAsm4MB_emit_copy_short:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_two_match_nolit_encodeBlockAsm4MB_emit_copy_short:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 repeat_two_offset_match_nolit_encodeBlockAsm4MB_emit_copy_short:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 two_byte_offset_short_match_nolit_encodeBlockAsm4MB:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm4MB
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm4MB
 
 emit_copy_three_match_nolit_encodeBlockAsm4MB:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeBlockAsm4MB:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBlockAsm4MB
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBlockAsm4MB
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBlockAsm4MB:
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x10, DI
-	IMULQ R8, DI
-	SHRQ  $0x32, DI
-	SHLQ  $0x10, BX
-	IMULQ R8, BX
-	SHRQ  $0x32, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x10, R8
+	IMULQ R9, R8
+	SHRQ  $0x32, R8
+	SHLQ  $0x10, SI
+	IMULQ R9, SI
+	SHRQ  $0x32, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeBlockAsm4MB
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeBlockAsm4MB
 
 emit_remainder_encodeBlockAsm4MB:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 4(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 4(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBlockAsm4MB
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBlockAsm4MB:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm4MB
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBlockAsm4MB
@@ -2618,33 +2620,33 @@ emit_remainder_ok_encodeBlockAsm4MB:
 	JB   three_bytes_emit_remainder_encodeBlockAsm4MB
 	MOVL DX, BX
 	SHRL $0x10, BX
-	MOVB $0xf8, (AX)
-	MOVW DX, 1(AX)
-	MOVB BL, 3(AX)
-	ADDQ $0x04, AX
+	MOVB $0xf8, (CX)
+	MOVW DX, 1(CX)
+	MOVB BL, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 
 three_bytes_emit_remainder_encodeBlockAsm4MB:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 
 two_bytes_emit_remainder_encodeBlockAsm4MB:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBlockAsm4MB
 	JMP  memmove_long_emit_remainder_encodeBlockAsm4MB
 
 one_byte_emit_remainder_encodeBlockAsm4MB:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBlockAsm4MB:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -2660,73 +2662,73 @@ memmove_emit_remainder_encodeBlockAsm4MB:
 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm4MB_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBlockAsm4MB:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm4MB
 
 memmove_long_emit_remainder_encodeBlockAsm4MB:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
 	MOVOU (SI), X4
@@ -2740,967 +2742,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm4MBlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBlockAsm4MB:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBlockAsm12B(dst []byte, src []byte) int
+// func encodeBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBlockAsm12B(SB), $16408-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000080, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm12B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000080, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBlockAsm12B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBlockAsm12B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBlockAsm12B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x05, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x05, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBlockAsm12B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x000000cf1bbcdcbb, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x18, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x000000cf1bbcdcbb, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x18, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x34, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x18, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x18, R11
+	IMULQ R9, R11
+	SHRQ  $0x34, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x18, R10
+	IMULQ R9, R10
+	SHRQ  $0x34, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeBlockAsm12B
-	LEAL  1(CX), SI
-	MOVL  12(SP), DI
-	MOVL  SI, BX
-	SUBL  16(SP), BX
+	LEAL  1(DX), DI
+	MOVL  12(SP), R8
+	MOVL  DI, SI
+	SUBL  16(SP), SI
 	JZ    repeat_extend_back_end_encodeBlockAsm12B
 
 repeat_extend_back_loop_encodeBlockAsm12B:
-	CMPL SI, DI
+	CMPL DI, R8
 	JBE  repeat_extend_back_end_encodeBlockAsm12B
-	MOVB -1(DX)(BX*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(SI*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeBlockAsm12B
-	LEAL -1(SI), SI
-	DECL BX
+	LEAL -1(DI), DI
+	DECL SI
 	JNZ  repeat_extend_back_loop_encodeBlockAsm12B
 
 repeat_extend_back_end_encodeBlockAsm12B:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeBlockAsm12B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm12B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeBlockAsm12B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeBlockAsm12B
 	JB   three_bytes_repeat_emit_encodeBlockAsm12B
 
 three_bytes_repeat_emit_encodeBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
 
 two_bytes_repeat_emit_encodeBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeBlockAsm12B
 	JMP  memmove_long_repeat_emit_encodeBlockAsm12B
 
 one_byte_repeat_emit_encodeBlockAsm12B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm12B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm12B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm12B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_repeat_emit_encodeBlockAsm12B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm12B
 
 memmove_long_repeat_emit_encodeBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R11
-	SHRQ  $0x05, R11
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R12
-	SUBQ  R10, R12
-	DECQ  R11
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R12
+	SHRQ  $0x05, R12
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R13
+	SUBQ  R11, R13
+	DECQ  R12
 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R12*1), R10
-	LEAQ  -32(AX)(R12*1), R13
+	LEAQ  -32(R10)(R13*1), R11
+	LEAQ  -32(CX)(R13*1), R14
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R13)
-	MOVOA X5, 16(R13)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R14)
+	MOVOA X5, 16(R14)
+	ADDQ  $0x20, R14
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R13
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R12
-	DECQ  R11
+	DECQ  R12
 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R12*1), X4
-	MOVOU -16(R9)(R12*1), X5
-	MOVOA X4, -32(AX)(R12*1)
-	MOVOA X5, -16(AX)(R12*1)
-	ADDQ  $0x20, R12
-	CMPQ  R8, R12
+	MOVOU -32(R10)(R13*1), X4
+	MOVOU -16(R10)(R13*1), X5
+	MOVOA X4, -32(CX)(R13*1)
+	MOVOA X5, -16(CX)(R13*1)
+	ADDQ  $0x20, R13
+	CMPQ  R9, R13
 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeBlockAsm12B:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), R8
-	SUBL CX, R8
-	LEAQ (DX)(CX*1), R9
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R9
+	SUBL DX, R9
+	LEAQ (BX)(DX*1), R10
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_repeat_extend_encodeBlockAsm12B:
-	CMPL R8, $0x10
+	CMPL R9, $0x10
 	JB   matchlen_match8_repeat_extend_encodeBlockAsm12B
-	MOVQ (R9)(R11*1), R10
-	MOVQ 8(R9)(R11*1), R12
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	MOVQ 8(R10)(R12*1), R13
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
-	XORQ 8(BX)(R11*1), R12
+	XORQ 8(SI)(R12*1), R13
 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm12B
-	LEAL -16(R8), R8
-	LEAL 16(R11), R11
+	LEAL -16(R9), R9
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm12B
 
 matchlen_bsf_16repeat_extend_encodeBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm12B
 
 matchlen_match8_repeat_extend_encodeBlockAsm12B:
-	CMPL R8, $0x08
+	CMPL R9, $0x08
 	JB   matchlen_match4_repeat_extend_encodeBlockAsm12B
-	MOVQ (R9)(R11*1), R10
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm12B
-	LEAL -8(R8), R8
-	LEAL 8(R11), R11
+	LEAL -8(R9), R9
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm12B
 
 matchlen_bsf_8_repeat_extend_encodeBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm12B
 
 matchlen_match4_repeat_extend_encodeBlockAsm12B:
-	CMPL R8, $0x04
+	CMPL R9, $0x04
 	JB   matchlen_match2_repeat_extend_encodeBlockAsm12B
-	MOVL (R9)(R11*1), R10
-	CMPL (BX)(R11*1), R10
+	MOVL (R10)(R12*1), R11
+	CMPL (SI)(R12*1), R11
 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm12B
-	LEAL -4(R8), R8
-	LEAL 4(R11), R11
+	LEAL -4(R9), R9
+	LEAL 4(R12), R12
 
 matchlen_match2_repeat_extend_encodeBlockAsm12B:
-	CMPL R8, $0x01
+	CMPL R9, $0x01
 	JE   matchlen_match1_repeat_extend_encodeBlockAsm12B
 	JB   repeat_extend_forward_end_encodeBlockAsm12B
-	MOVW (R9)(R11*1), R10
-	CMPW (BX)(R11*1), R10
+	MOVW (R10)(R12*1), R11
+	CMPW (SI)(R12*1), R11
 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm12B
-	LEAL 2(R11), R11
-	SUBL $0x02, R8
+	LEAL 2(R12), R12
+	SUBL $0x02, R9
 	JZ   repeat_extend_forward_end_encodeBlockAsm12B
 
 matchlen_match1_repeat_extend_encodeBlockAsm12B:
-	MOVB (R9)(R11*1), R10
-	CMPB (BX)(R11*1), R10
+	MOVB (R10)(R12*1), R11
+	CMPB (SI)(R12*1), R11
 	JNE  repeat_extend_forward_end_encodeBlockAsm12B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 repeat_extend_forward_end_encodeBlockAsm12B:
-	ADDL  R11, CX
-	MOVL  CX, BX
-	SUBL  SI, BX
-	MOVL  16(SP), SI
-	TESTL DI, DI
+	ADDL  R12, DX
+	MOVL  DX, SI
+	SUBL  DI, SI
+	MOVL  16(SP), DI
+	TESTL R8, R8
 	JZ    repeat_as_copy_encodeBlockAsm12B
 
 	// emitRepeat
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_match_repeat_encodeBlockAsm12B
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm12B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_match_repeat_encodeBlockAsm12B
 
 cant_repeat_two_offset_match_repeat_encodeBlockAsm12B:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_match_repeat_encodeBlockAsm12B
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_three_match_repeat_encodeBlockAsm12B:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_two_match_repeat_encodeBlockAsm12B:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_two_offset_match_repeat_encodeBlockAsm12B:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_as_copy_encodeBlockAsm12B:
 	// emitCopy
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm12B
-	MOVL $0x00000001, DI
-	LEAL 16(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, BX
+	MOVL $0x00000001, R8
+	LEAL 16(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, SI
 
 	// emitRepeat
-	LEAL -4(BX), BX
+	LEAL -4(SI), SI
 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short_2b:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 long_offset_short_repeat_as_copy_encodeBlockAsm12B:
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_three_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_two_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm12B_emit_copy_short:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 two_byte_offset_short_repeat_as_copy_encodeBlockAsm12B:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm12B
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm12B
 
 emit_copy_three_repeat_as_copy_encodeBlockAsm12B:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeBlockAsm12B:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeBlockAsm12B
 
 no_repeat_found_encodeBlockAsm12B:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeBlockAsm12B
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeBlockAsm12B
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeBlockAsm12B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBlockAsm12B
 
 candidate3_match_encodeBlockAsm12B:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeBlockAsm12B
 
 candidate2_match_encodeBlockAsm12B:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeBlockAsm12B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBlockAsm12B
 
 match_extend_back_loop_encodeBlockAsm12B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBlockAsm12B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBlockAsm12B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBlockAsm12B
 	JMP  match_extend_back_loop_encodeBlockAsm12B
 
 match_extend_back_end_encodeBlockAsm12B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBlockAsm12B:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeBlockAsm12B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeBlockAsm12B
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeBlockAsm12B
 	JB   three_bytes_match_emit_encodeBlockAsm12B
 
 three_bytes_match_emit_encodeBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm12B
 
 two_bytes_match_emit_encodeBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeBlockAsm12B
 	JMP  memmove_long_match_emit_encodeBlockAsm12B
 
 one_byte_match_emit_encodeBlockAsm12B:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBlockAsm12B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeBlockAsm12B_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBlockAsm12B:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeBlockAsm12B
 
 memmove_long_match_emit_encodeBlockAsm12B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeBlockAsm12B:
 match_nolit_loop_encodeBlockAsm12B:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeBlockAsm12B:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeBlockAsm12B
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm12B
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm12B
 
 matchlen_bsf_16match_nolit_encodeBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeBlockAsm12B
 
 matchlen_match8_match_nolit_encodeBlockAsm12B:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeBlockAsm12B
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm12B
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeBlockAsm12B
 
 matchlen_bsf_8_match_nolit_encodeBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeBlockAsm12B
 
 matchlen_match4_match_nolit_encodeBlockAsm12B:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeBlockAsm12B
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeBlockAsm12B
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeBlockAsm12B:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeBlockAsm12B
 	JB   match_nolit_end_encodeBlockAsm12B
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeBlockAsm12B
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeBlockAsm12B
 
 matchlen_match1_match_nolit_encodeBlockAsm12B:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeBlockAsm12B
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeBlockAsm12B:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm12B
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBlockAsm12B
-	MOVL $0x00000001, SI
-	LEAL 16(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R9
+	MOVL $0x00000001, DI
+	LEAL 16(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R10
 
 	// emitRepeat
-	LEAL -4(R9), R9
+	LEAL -4(R10), R10
 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short_2b:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 long_offset_short_match_nolit_encodeBlockAsm12B:
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 repeat_three_match_nolit_encodeBlockAsm12B_emit_copy_short:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 repeat_two_match_nolit_encodeBlockAsm12B_emit_copy_short:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 repeat_two_offset_match_nolit_encodeBlockAsm12B_emit_copy_short:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
-	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
-
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
+	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
+
 two_byte_offset_short_match_nolit_encodeBlockAsm12B:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm12B
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm12B
 
 emit_copy_three_match_nolit_encodeBlockAsm12B:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeBlockAsm12B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBlockAsm12B
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBlockAsm12B:
-	MOVQ  $0x000000cf1bbcdcbb, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x18, DI
-	IMULQ R8, DI
-	SHRQ  $0x34, DI
-	SHLQ  $0x18, BX
-	IMULQ R8, BX
-	SHRQ  $0x34, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x000000cf1bbcdcbb, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x18, R8
+	IMULQ R9, R8
+	SHRQ  $0x34, R8
+	SHLQ  $0x18, SI
+	IMULQ R9, SI
+	SHRQ  $0x34, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeBlockAsm12B
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeBlockAsm12B
 
 emit_remainder_encodeBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm12B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBlockAsm12B
@@ -3709,26 +3712,26 @@ emit_remainder_ok_encodeBlockAsm12B:
 	JB   three_bytes_emit_remainder_encodeBlockAsm12B
 
 three_bytes_emit_remainder_encodeBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
 
 two_bytes_emit_remainder_encodeBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBlockAsm12B
 	JMP  memmove_long_emit_remainder_encodeBlockAsm12B
 
 one_byte_emit_remainder_encodeBlockAsm12B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -3744,73 +3747,73 @@ memmove_emit_remainder_encodeBlockAsm12B:
 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm12B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBlockAsm12B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm12B
 
 memmove_long_emit_remainder_encodeBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -3824,967 +3827,968 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBlockAsm12B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBlockAsm10B(dst []byte, src []byte) int
+// func encodeBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBlockAsm10B(SB), $4120-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000020, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm10B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000020, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBlockAsm10B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBlockAsm10B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBlockAsm10B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x05, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x05, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBlockAsm10B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x20, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x36, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x20, R11
+	IMULQ R9, R11
+	SHRQ  $0x36, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x20, R10
+	IMULQ R9, R10
+	SHRQ  $0x36, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeBlockAsm10B
-	LEAL  1(CX), SI
-	MOVL  12(SP), DI
-	MOVL  SI, BX
-	SUBL  16(SP), BX
+	LEAL  1(DX), DI
+	MOVL  12(SP), R8
+	MOVL  DI, SI
+	SUBL  16(SP), SI
 	JZ    repeat_extend_back_end_encodeBlockAsm10B
 
 repeat_extend_back_loop_encodeBlockAsm10B:
-	CMPL SI, DI
+	CMPL DI, R8
 	JBE  repeat_extend_back_end_encodeBlockAsm10B
-	MOVB -1(DX)(BX*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(SI*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeBlockAsm10B
-	LEAL -1(SI), SI
-	DECL BX
+	LEAL -1(DI), DI
+	DECL SI
 	JNZ  repeat_extend_back_loop_encodeBlockAsm10B
 
 repeat_extend_back_end_encodeBlockAsm10B:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeBlockAsm10B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm10B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeBlockAsm10B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeBlockAsm10B
 	JB   three_bytes_repeat_emit_encodeBlockAsm10B
 
 three_bytes_repeat_emit_encodeBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
 
 two_bytes_repeat_emit_encodeBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeBlockAsm10B
 	JMP  memmove_long_repeat_emit_encodeBlockAsm10B
 
 one_byte_repeat_emit_encodeBlockAsm10B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm10B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm10B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm10B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_repeat_emit_encodeBlockAsm10B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm10B
 
 memmove_long_repeat_emit_encodeBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R11
-	SHRQ  $0x05, R11
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R12
-	SUBQ  R10, R12
-	DECQ  R11
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R12
+	SHRQ  $0x05, R12
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R13
+	SUBQ  R11, R13
+	DECQ  R12
 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R12*1), R10
-	LEAQ  -32(AX)(R12*1), R13
+	LEAQ  -32(R10)(R13*1), R11
+	LEAQ  -32(CX)(R13*1), R14
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R13)
-	MOVOA X5, 16(R13)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R14)
+	MOVOA X5, 16(R14)
+	ADDQ  $0x20, R14
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R13
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R12
-	DECQ  R11
+	DECQ  R12
 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R12*1), X4
-	MOVOU -16(R9)(R12*1), X5
-	MOVOA X4, -32(AX)(R12*1)
-	MOVOA X5, -16(AX)(R12*1)
-	ADDQ  $0x20, R12
-	CMPQ  R8, R12
+	MOVOU -32(R10)(R13*1), X4
+	MOVOU -16(R10)(R13*1), X5
+	MOVOA X4, -32(CX)(R13*1)
+	MOVOA X5, -16(CX)(R13*1)
+	ADDQ  $0x20, R13
+	CMPQ  R9, R13
 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeBlockAsm10B:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), R8
-	SUBL CX, R8
-	LEAQ (DX)(CX*1), R9
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R9
+	SUBL DX, R9
+	LEAQ (BX)(DX*1), R10
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_repeat_extend_encodeBlockAsm10B:
-	CMPL R8, $0x10
+	CMPL R9, $0x10
 	JB   matchlen_match8_repeat_extend_encodeBlockAsm10B
-	MOVQ (R9)(R11*1), R10
-	MOVQ 8(R9)(R11*1), R12
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	MOVQ 8(R10)(R12*1), R13
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
-	XORQ 8(BX)(R11*1), R12
+	XORQ 8(SI)(R12*1), R13
 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm10B
-	LEAL -16(R8), R8
-	LEAL 16(R11), R11
+	LEAL -16(R9), R9
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm10B
 
 matchlen_bsf_16repeat_extend_encodeBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm10B
 
 matchlen_match8_repeat_extend_encodeBlockAsm10B:
-	CMPL R8, $0x08
+	CMPL R9, $0x08
 	JB   matchlen_match4_repeat_extend_encodeBlockAsm10B
-	MOVQ (R9)(R11*1), R10
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm10B
-	LEAL -8(R8), R8
-	LEAL 8(R11), R11
+	LEAL -8(R9), R9
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm10B
 
 matchlen_bsf_8_repeat_extend_encodeBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm10B
 
 matchlen_match4_repeat_extend_encodeBlockAsm10B:
-	CMPL R8, $0x04
+	CMPL R9, $0x04
 	JB   matchlen_match2_repeat_extend_encodeBlockAsm10B
-	MOVL (R9)(R11*1), R10
-	CMPL (BX)(R11*1), R10
+	MOVL (R10)(R12*1), R11
+	CMPL (SI)(R12*1), R11
 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm10B
-	LEAL -4(R8), R8
-	LEAL 4(R11), R11
+	LEAL -4(R9), R9
+	LEAL 4(R12), R12
 
 matchlen_match2_repeat_extend_encodeBlockAsm10B:
-	CMPL R8, $0x01
+	CMPL R9, $0x01
 	JE   matchlen_match1_repeat_extend_encodeBlockAsm10B
 	JB   repeat_extend_forward_end_encodeBlockAsm10B
-	MOVW (R9)(R11*1), R10
-	CMPW (BX)(R11*1), R10
+	MOVW (R10)(R12*1), R11
+	CMPW (SI)(R12*1), R11
 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm10B
-	LEAL 2(R11), R11
-	SUBL $0x02, R8
+	LEAL 2(R12), R12
+	SUBL $0x02, R9
 	JZ   repeat_extend_forward_end_encodeBlockAsm10B
 
 matchlen_match1_repeat_extend_encodeBlockAsm10B:
-	MOVB (R9)(R11*1), R10
-	CMPB (BX)(R11*1), R10
+	MOVB (R10)(R12*1), R11
+	CMPB (SI)(R12*1), R11
 	JNE  repeat_extend_forward_end_encodeBlockAsm10B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 repeat_extend_forward_end_encodeBlockAsm10B:
-	ADDL  R11, CX
-	MOVL  CX, BX
-	SUBL  SI, BX
-	MOVL  16(SP), SI
-	TESTL DI, DI
+	ADDL  R12, DX
+	MOVL  DX, SI
+	SUBL  DI, SI
+	MOVL  16(SP), DI
+	TESTL R8, R8
 	JZ    repeat_as_copy_encodeBlockAsm10B
 
 	// emitRepeat
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_match_repeat_encodeBlockAsm10B
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm10B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_match_repeat_encodeBlockAsm10B
 
 cant_repeat_two_offset_match_repeat_encodeBlockAsm10B:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_match_repeat_encodeBlockAsm10B
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_three_match_repeat_encodeBlockAsm10B:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_two_match_repeat_encodeBlockAsm10B:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_two_offset_match_repeat_encodeBlockAsm10B:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_as_copy_encodeBlockAsm10B:
 	// emitCopy
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm10B
-	MOVL $0x00000001, DI
-	LEAL 16(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, BX
+	MOVL $0x00000001, R8
+	LEAL 16(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, SI
 
 	// emitRepeat
-	LEAL -4(BX), BX
+	LEAL -4(SI), SI
 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short_2b:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 long_offset_short_repeat_as_copy_encodeBlockAsm10B:
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL BX, DI
-	LEAL -4(BX), BX
-	CMPL DI, $0x08
+	MOVL SI, R8
+	LEAL -4(SI), SI
+	CMPL R8, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
-	CMPL DI, $0x0c
+	CMPL R8, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JB   repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_three_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_two_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 repeat_two_offset_repeat_as_copy_encodeBlockAsm10B_emit_copy_short:
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 two_byte_offset_short_repeat_as_copy_encodeBlockAsm10B:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm10B
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm10B
 
 emit_copy_three_repeat_as_copy_encodeBlockAsm10B:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeBlockAsm10B:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeBlockAsm10B
 
 no_repeat_found_encodeBlockAsm10B:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeBlockAsm10B
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeBlockAsm10B
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeBlockAsm10B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBlockAsm10B
 
 candidate3_match_encodeBlockAsm10B:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeBlockAsm10B
 
 candidate2_match_encodeBlockAsm10B:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeBlockAsm10B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBlockAsm10B
 
 match_extend_back_loop_encodeBlockAsm10B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBlockAsm10B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBlockAsm10B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBlockAsm10B
 	JMP  match_extend_back_loop_encodeBlockAsm10B
 
 match_extend_back_end_encodeBlockAsm10B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBlockAsm10B:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeBlockAsm10B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeBlockAsm10B
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeBlockAsm10B
 	JB   three_bytes_match_emit_encodeBlockAsm10B
 
 three_bytes_match_emit_encodeBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm10B
 
 two_bytes_match_emit_encodeBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeBlockAsm10B
 	JMP  memmove_long_match_emit_encodeBlockAsm10B
 
 one_byte_match_emit_encodeBlockAsm10B:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBlockAsm10B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeBlockAsm10B_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBlockAsm10B:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeBlockAsm10B
 
 memmove_long_match_emit_encodeBlockAsm10B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeBlockAsm10B:
 match_nolit_loop_encodeBlockAsm10B:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeBlockAsm10B:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeBlockAsm10B
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm10B
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm10B
 
 matchlen_bsf_16match_nolit_encodeBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeBlockAsm10B
 
 matchlen_match8_match_nolit_encodeBlockAsm10B:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeBlockAsm10B
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm10B
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeBlockAsm10B
 
 matchlen_bsf_8_match_nolit_encodeBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeBlockAsm10B
 
 matchlen_match4_match_nolit_encodeBlockAsm10B:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeBlockAsm10B
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeBlockAsm10B
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeBlockAsm10B:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeBlockAsm10B
 	JB   match_nolit_end_encodeBlockAsm10B
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeBlockAsm10B
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeBlockAsm10B
 
 matchlen_match1_match_nolit_encodeBlockAsm10B:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeBlockAsm10B
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeBlockAsm10B:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm10B
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBlockAsm10B
-	MOVL $0x00000001, SI
-	LEAL 16(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R9
+	MOVL $0x00000001, DI
+	LEAL 16(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R10
 
 	// emitRepeat
-	LEAL -4(R9), R9
+	LEAL -4(R10), R10
 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short_2b:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 long_offset_short_match_nolit_encodeBlockAsm10B:
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R9, SI
-	LEAL -4(R9), R9
-	CMPL SI, $0x08
+	MOVL R10, DI
+	LEAL -4(R10), R10
+	CMPL DI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 repeat_three_match_nolit_encodeBlockAsm10B_emit_copy_short:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 repeat_two_match_nolit_encodeBlockAsm10B_emit_copy_short:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 repeat_two_offset_match_nolit_encodeBlockAsm10B_emit_copy_short:
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 two_byte_offset_short_match_nolit_encodeBlockAsm10B:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm10B
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm10B
 
 emit_copy_three_match_nolit_encodeBlockAsm10B:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeBlockAsm10B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBlockAsm10B
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBlockAsm10B:
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x20, DI
-	IMULQ R8, DI
-	SHRQ  $0x36, DI
-	SHLQ  $0x20, BX
-	IMULQ R8, BX
-	SHRQ  $0x36, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x20, R8
+	IMULQ R9, R8
+	SHRQ  $0x36, R8
+	SHLQ  $0x20, SI
+	IMULQ R9, SI
+	SHRQ  $0x36, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeBlockAsm10B
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeBlockAsm10B
 
 emit_remainder_encodeBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm10B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBlockAsm10B
@@ -4793,26 +4797,26 @@ emit_remainder_ok_encodeBlockAsm10B:
 	JB   three_bytes_emit_remainder_encodeBlockAsm10B
 
 three_bytes_emit_remainder_encodeBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
 
 two_bytes_emit_remainder_encodeBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBlockAsm10B
 	JMP  memmove_long_emit_remainder_encodeBlockAsm10B
 
 one_byte_emit_remainder_encodeBlockAsm10B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -4828,73 +4832,73 @@ memmove_emit_remainder_encodeBlockAsm10B:
 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm10B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBlockAsm10B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm10B
 
 memmove_long_emit_remainder_encodeBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -4908,943 +4912,944 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBlockAsm10B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBlockAsm8B(dst []byte, src []byte) int
+// func encodeBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBlockAsm8B(SB), $1048-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000008, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBlockAsm8B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000008, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBlockAsm8B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBlockAsm8B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBlockAsm8B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x04, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x04, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBlockAsm8B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x38, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x20, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x38, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x38, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
-	JNE   no_repeat_found_encodeBlockAsm8B
-	LEAL  1(CX), SI
-	MOVL  12(SP), DI
-	MOVL  SI, BX
-	SUBL  16(SP), BX
+	SHLQ  $0x20, R11
+	IMULQ R9, R11
+	SHRQ  $0x38, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x20, R10
+	IMULQ R9, R10
+	SHRQ  $0x38, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
+	JNE   no_repeat_found_encodeBlockAsm8B
+	LEAL  1(DX), DI
+	MOVL  12(SP), R8
+	MOVL  DI, SI
+	SUBL  16(SP), SI
 	JZ    repeat_extend_back_end_encodeBlockAsm8B
 
 repeat_extend_back_loop_encodeBlockAsm8B:
-	CMPL SI, DI
+	CMPL DI, R8
 	JBE  repeat_extend_back_end_encodeBlockAsm8B
-	MOVB -1(DX)(BX*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(SI*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeBlockAsm8B
-	LEAL -1(SI), SI
-	DECL BX
+	LEAL -1(DI), DI
+	DECL SI
 	JNZ  repeat_extend_back_loop_encodeBlockAsm8B
 
 repeat_extend_back_end_encodeBlockAsm8B:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeBlockAsm8B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeBlockAsm8B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeBlockAsm8B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeBlockAsm8B
 	JB   three_bytes_repeat_emit_encodeBlockAsm8B
 
 three_bytes_repeat_emit_encodeBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
 
 two_bytes_repeat_emit_encodeBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeBlockAsm8B
 	JMP  memmove_long_repeat_emit_encodeBlockAsm8B
 
 one_byte_repeat_emit_encodeBlockAsm8B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeBlockAsm8B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_repeat_emit_encodeBlockAsm8B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_repeat_emit_encodeBlockAsm8B
 
 emit_lit_memmove_repeat_emit_encodeBlockAsm8B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_repeat_emit_encodeBlockAsm8B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeBlockAsm8B
 
 memmove_long_repeat_emit_encodeBlockAsm8B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R11
-	SHRQ  $0x05, R11
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R12
-	SUBQ  R10, R12
-	DECQ  R11
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R12
+	SHRQ  $0x05, R12
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R13
+	SUBQ  R11, R13
+	DECQ  R12
 	JA    emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R12*1), R10
-	LEAQ  -32(AX)(R12*1), R13
+	LEAQ  -32(R10)(R13*1), R11
+	LEAQ  -32(CX)(R13*1), R14
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R13)
-	MOVOA X5, 16(R13)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R14)
+	MOVOA X5, 16(R14)
+	ADDQ  $0x20, R14
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R13
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R12
-	DECQ  R11
+	DECQ  R12
 	JNA   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R12*1), X4
-	MOVOU -16(R9)(R12*1), X5
-	MOVOA X4, -32(AX)(R12*1)
-	MOVOA X5, -16(AX)(R12*1)
-	ADDQ  $0x20, R12
-	CMPQ  R8, R12
+	MOVOU -32(R10)(R13*1), X4
+	MOVOU -16(R10)(R13*1), X5
+	MOVOA X4, -32(CX)(R13*1)
+	MOVOA X5, -16(CX)(R13*1)
+	ADDQ  $0x20, R13
+	CMPQ  R9, R13
 	JAE   emit_lit_memmove_long_repeat_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeBlockAsm8B:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), R8
-	SUBL CX, R8
-	LEAQ (DX)(CX*1), R9
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R9
+	SUBL DX, R9
+	LEAQ (BX)(DX*1), R10
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_repeat_extend_encodeBlockAsm8B:
-	CMPL R8, $0x10
+	CMPL R9, $0x10
 	JB   matchlen_match8_repeat_extend_encodeBlockAsm8B
-	MOVQ (R9)(R11*1), R10
-	MOVQ 8(R9)(R11*1), R12
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	MOVQ 8(R10)(R12*1), R13
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
-	XORQ 8(BX)(R11*1), R12
+	XORQ 8(SI)(R12*1), R13
 	JNZ  matchlen_bsf_16repeat_extend_encodeBlockAsm8B
-	LEAL -16(R8), R8
-	LEAL 16(R11), R11
+	LEAL -16(R9), R9
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_repeat_extend_encodeBlockAsm8B
 
 matchlen_bsf_16repeat_extend_encodeBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm8B
 
 matchlen_match8_repeat_extend_encodeBlockAsm8B:
-	CMPL R8, $0x08
+	CMPL R9, $0x08
 	JB   matchlen_match4_repeat_extend_encodeBlockAsm8B
-	MOVQ (R9)(R11*1), R10
-	XORQ (BX)(R11*1), R10
+	MOVQ (R10)(R12*1), R11
+	XORQ (SI)(R12*1), R11
 	JNZ  matchlen_bsf_8_repeat_extend_encodeBlockAsm8B
-	LEAL -8(R8), R8
-	LEAL 8(R11), R11
+	LEAL -8(R9), R9
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_repeat_extend_encodeBlockAsm8B
 
 matchlen_bsf_8_repeat_extend_encodeBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  repeat_extend_forward_end_encodeBlockAsm8B
 
 matchlen_match4_repeat_extend_encodeBlockAsm8B:
-	CMPL R8, $0x04
+	CMPL R9, $0x04
 	JB   matchlen_match2_repeat_extend_encodeBlockAsm8B
-	MOVL (R9)(R11*1), R10
-	CMPL (BX)(R11*1), R10
+	MOVL (R10)(R12*1), R11
+	CMPL (SI)(R12*1), R11
 	JNE  matchlen_match2_repeat_extend_encodeBlockAsm8B
-	LEAL -4(R8), R8
-	LEAL 4(R11), R11
+	LEAL -4(R9), R9
+	LEAL 4(R12), R12
 
 matchlen_match2_repeat_extend_encodeBlockAsm8B:
-	CMPL R8, $0x01
+	CMPL R9, $0x01
 	JE   matchlen_match1_repeat_extend_encodeBlockAsm8B
 	JB   repeat_extend_forward_end_encodeBlockAsm8B
-	MOVW (R9)(R11*1), R10
-	CMPW (BX)(R11*1), R10
+	MOVW (R10)(R12*1), R11
+	CMPW (SI)(R12*1), R11
 	JNE  matchlen_match1_repeat_extend_encodeBlockAsm8B
-	LEAL 2(R11), R11
-	SUBL $0x02, R8
+	LEAL 2(R12), R12
+	SUBL $0x02, R9
 	JZ   repeat_extend_forward_end_encodeBlockAsm8B
 
 matchlen_match1_repeat_extend_encodeBlockAsm8B:
-	MOVB (R9)(R11*1), R10
-	CMPB (BX)(R11*1), R10
+	MOVB (R10)(R12*1), R11
+	CMPB (SI)(R12*1), R11
 	JNE  repeat_extend_forward_end_encodeBlockAsm8B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 repeat_extend_forward_end_encodeBlockAsm8B:
-	ADDL  R11, CX
-	MOVL  CX, BX
-	SUBL  SI, BX
-	MOVL  16(SP), SI
-	TESTL DI, DI
+	ADDL  R12, DX
+	MOVL  DX, SI
+	SUBL  DI, SI
+	MOVL  16(SP), DI
+	TESTL R8, R8
 	JZ    repeat_as_copy_encodeBlockAsm8B
 
 	// emitRepeat
-	MOVL BX, SI
-	LEAL -4(BX), BX
-	CMPL SI, $0x08
+	MOVL SI, DI
+	LEAL -4(SI), SI
+	CMPL DI, $0x08
 	JBE  repeat_two_match_repeat_encodeBlockAsm8B
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_match_repeat_encodeBlockAsm8B
 
 cant_repeat_two_offset_match_repeat_encodeBlockAsm8B:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_match_repeat_encodeBlockAsm8B
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 repeat_three_match_repeat_encodeBlockAsm8B:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 repeat_two_match_repeat_encodeBlockAsm8B:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 repeat_as_copy_encodeBlockAsm8B:
 	// emitCopy
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  long_offset_short_repeat_as_copy_encodeBlockAsm8B
-	MOVL $0x00000001, DI
-	LEAL 16(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, BX
+	MOVL $0x00000001, R8
+	LEAL 16(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, SI
 
 	// emitRepeat
-	LEAL -4(BX), BX
+	LEAL -4(SI), SI
 	JMP  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
-	MOVL BX, SI
-	LEAL -4(BX), BX
-	CMPL SI, $0x08
+	MOVL SI, DI
+	LEAL -4(SI), SI
+	CMPL DI, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short_2b:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 long_offset_short_repeat_as_copy_encodeBlockAsm8B:
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL BX, SI
-	LEAL -4(BX), BX
-	CMPL SI, $0x08
+	MOVL SI, DI
+	LEAL -4(SI), SI
+	CMPL DI, $0x08
 	JBE  repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
-	CMPL SI, $0x0c
+	CMPL DI, $0x0c
 	JAE  cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
 
 cant_repeat_two_offset_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
-	CMPL BX, $0x00000104
+	CMPL SI, $0x00000104
 	JB   repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short
-	LEAL -256(BX), BX
-	MOVW $0x0019, (AX)
-	MOVW BX, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(SI), SI
+	MOVW $0x0019, (CX)
+	MOVW SI, 2(CX)
+	ADDQ $0x04, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 repeat_three_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
-	LEAL -4(BX), BX
-	MOVW $0x0015, (AX)
-	MOVB BL, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(SI), SI
+	MOVW $0x0015, (CX)
+	MOVB SI, 2(CX)
+	ADDQ $0x03, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 repeat_two_repeat_as_copy_encodeBlockAsm8B_emit_copy_short:
-	SHLL $0x02, BX
-	ORL  $0x01, BX
-	MOVW BX, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, SI
+	ORL  $0x01, SI
+	MOVW SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
-	XORQ DI, DI
-	LEAL 1(DI)(BX*4), BX
-	MOVB SI, 1(AX)
-	SARL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	XORQ R8, R8
+	LEAL 1(R8)(SI*4), SI
+	MOVB DI, 1(CX)
+	SARL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 two_byte_offset_short_repeat_as_copy_encodeBlockAsm8B:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeBlockAsm8B
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeBlockAsm8B
 
 emit_copy_three_repeat_as_copy_encodeBlockAsm8B:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeBlockAsm8B:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeBlockAsm8B
 
 no_repeat_found_encodeBlockAsm8B:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeBlockAsm8B
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeBlockAsm8B
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeBlockAsm8B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBlockAsm8B
 
 candidate3_match_encodeBlockAsm8B:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeBlockAsm8B
 
 candidate2_match_encodeBlockAsm8B:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeBlockAsm8B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBlockAsm8B
 
 match_extend_back_loop_encodeBlockAsm8B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBlockAsm8B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBlockAsm8B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBlockAsm8B
 	JMP  match_extend_back_loop_encodeBlockAsm8B
 
 match_extend_back_end_encodeBlockAsm8B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBlockAsm8B:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeBlockAsm8B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeBlockAsm8B
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeBlockAsm8B
 	JB   three_bytes_match_emit_encodeBlockAsm8B
 
 three_bytes_match_emit_encodeBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBlockAsm8B
 
 two_bytes_match_emit_encodeBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeBlockAsm8B
 	JMP  memmove_long_match_emit_encodeBlockAsm8B
 
 one_byte_match_emit_encodeBlockAsm8B:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBlockAsm8B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeBlockAsm8B_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBlockAsm8B:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeBlockAsm8B
 
 memmove_long_match_emit_encodeBlockAsm8B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
-	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
+	ADDQ  $0x20, R12
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeBlockAsm8B:
 match_nolit_loop_encodeBlockAsm8B:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeBlockAsm8B:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeBlockAsm8B
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeBlockAsm8B
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeBlockAsm8B
 
 matchlen_bsf_16match_nolit_encodeBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeBlockAsm8B
 
 matchlen_match8_match_nolit_encodeBlockAsm8B:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeBlockAsm8B
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeBlockAsm8B
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeBlockAsm8B
 
 matchlen_bsf_8_match_nolit_encodeBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeBlockAsm8B
 
 matchlen_match4_match_nolit_encodeBlockAsm8B:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeBlockAsm8B
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeBlockAsm8B
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeBlockAsm8B:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeBlockAsm8B
 	JB   match_nolit_end_encodeBlockAsm8B
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeBlockAsm8B
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeBlockAsm8B
 
 matchlen_match1_match_nolit_encodeBlockAsm8B:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeBlockAsm8B
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeBlockAsm8B:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBlockAsm8B
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBlockAsm8B
-	MOVL $0x00000001, SI
-	LEAL 16(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R9
+	MOVL $0x00000001, DI
+	LEAL 16(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R10
 
 	// emitRepeat
-	LEAL -4(R9), R9
+	LEAL -4(R10), R10
 	JMP  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
-	MOVL R9, BX
-	LEAL -4(R9), R9
-	CMPL BX, $0x08
+	MOVL R10, SI
+	LEAL -4(R10), R10
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 
 repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 
 repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short_2b:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 
 long_offset_short_match_nolit_encodeBlockAsm8B:
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R9, BX
-	LEAL -4(R9), R9
-	CMPL BX, $0x08
+	MOVL R10, SI
+	LEAL -4(R10), R10
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBlockAsm8B_emit_copy_short:
-	CMPL R9, $0x00000104
+	CMPL R10, $0x00000104
 	JB   repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short
-	LEAL -256(R9), R9
-	MOVW $0x0019, (AX)
-	MOVW R9, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R10), R10
+	MOVW $0x0019, (CX)
+	MOVW R10, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 
 repeat_three_match_nolit_encodeBlockAsm8B_emit_copy_short:
-	LEAL -4(R9), R9
-	MOVW $0x0015, (AX)
-	MOVB R9, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R10), R10
+	MOVW $0x0015, (CX)
+	MOVB R10, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 
 repeat_two_match_nolit_encodeBlockAsm8B_emit_copy_short:
-	SHLL $0x02, R9
-	ORL  $0x01, R9
-	MOVW R9, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R10
+	ORL  $0x01, R10
+	MOVW R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
-	XORQ SI, SI
-	LEAL 1(SI)(R9*4), R9
-	MOVB BL, 1(AX)
-	SARL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, R9
-	MOVB R9, (AX)
-	ADDQ $0x02, AX
+	XORQ DI, DI
+	LEAL 1(DI)(R10*4), R10
+	MOVB SI, 1(CX)
+	SARL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, R10
+	MOVB R10, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 
 two_byte_offset_short_match_nolit_encodeBlockAsm8B:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBlockAsm8B
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBlockAsm8B
 
 emit_copy_three_match_nolit_encodeBlockAsm8B:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeBlockAsm8B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBlockAsm8B
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBlockAsm8B:
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x20, DI
-	IMULQ R8, DI
-	SHRQ  $0x38, DI
-	SHLQ  $0x20, BX
-	IMULQ R8, BX
-	SHRQ  $0x38, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x20, R8
+	IMULQ R9, R8
+	SHRQ  $0x38, R8
+	SHLQ  $0x20, SI
+	IMULQ R9, SI
+	SHRQ  $0x38, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeBlockAsm8B
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeBlockAsm8B
 
 emit_remainder_encodeBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBlockAsm8B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBlockAsm8B
@@ -5853,26 +5858,26 @@ emit_remainder_ok_encodeBlockAsm8B:
 	JB   three_bytes_emit_remainder_encodeBlockAsm8B
 
 three_bytes_emit_remainder_encodeBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
 
 two_bytes_emit_remainder_encodeBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBlockAsm8B
 	JMP  memmove_long_emit_remainder_encodeBlockAsm8B
 
 one_byte_emit_remainder_encodeBlockAsm8B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -5888,73 +5893,73 @@ memmove_emit_remainder_encodeBlockAsm8B:
 	JMP  emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBlockAsm8B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBlockAsm8B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBlockAsm8B
 
 memmove_long_emit_remainder_encodeBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -5968,961 +5973,962 @@ emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBlockAsm8B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBetterBlockAsm(dst []byte, src []byte) int
+// func encodeBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm(SB), $589848-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00001200, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00001200, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBetterBlockAsm:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBetterBlockAsm
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -6(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -6(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBetterBlockAsm:
-	MOVL CX, BX
-	SUBL 12(SP), BX
-	SHRL $0x07, BX
-	CMPL BX, $0x63
+	MOVL DX, SI
+	SUBL 12(SP), SI
+	SHRL $0x07, SI
+	CMPL SI, $0x63
 	JBE  check_maxskip_ok_encodeBetterBlockAsm
-	LEAL 100(CX), BX
+	LEAL 100(DX), SI
 	JMP  check_maxskip_cont_encodeBetterBlockAsm
 
 check_maxskip_ok_encodeBetterBlockAsm:
-	LEAL 1(CX)(BX*1), BX
+	LEAL 1(DX)(SI*1), SI
 
 check_maxskip_cont_encodeBetterBlockAsm:
-	CMPL  BX, 8(SP)
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBetterBlockAsm
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x00cf1bbcdcbfa563, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  524312(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 524312(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x00cf1bbcdcbfa563, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x2f, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x32, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  524288(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 524288(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeBetterBlockAsm
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeBetterBlockAsm
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeBetterBlockAsm
 
 no_short_found_encodeBetterBlockAsm:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeBetterBlockAsm
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeBetterBlockAsm
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBetterBlockAsm
 
 candidateS_match_encodeBetterBlockAsm:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x2f, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeBetterBlockAsm
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeBetterBlockAsm:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBetterBlockAsm
 
 match_extend_back_loop_encodeBetterBlockAsm:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBetterBlockAsm
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBetterBlockAsm
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBetterBlockAsm
 	JMP  match_extend_back_loop_encodeBetterBlockAsm
 
 match_extend_back_end_encodeBetterBlockAsm:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 5(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 5(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBetterBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBetterBlockAsm:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm
 
 matchlen_bsf_16match_nolit_encodeBetterBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm
 
 matchlen_match8_match_nolit_encodeBetterBlockAsm:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm
 
 matchlen_bsf_8_match_nolit_encodeBetterBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm
 
 matchlen_match4_match_nolit_encodeBetterBlockAsm:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeBetterBlockAsm:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm
 	JB   match_nolit_end_encodeBetterBlockAsm
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeBetterBlockAsm
 
 matchlen_match1_match_nolit_encodeBetterBlockAsm:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeBetterBlockAsm
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeBetterBlockAsm:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	CMPL 16(SP), DI
+	CMPL 16(SP), R8
 	JEQ  match_is_repeat_encodeBetterBlockAsm
-	CMPL R11, $0x01
+	CMPL R12, $0x01
 	JA   match_length_ok_encodeBetterBlockAsm
-	CMPL DI, $0x0000ffff
+	CMPL R8, $0x0000ffff
 	JBE  match_length_ok_encodeBetterBlockAsm
-	MOVL 20(SP), CX
-	INCL CX
+	MOVL 20(SP), DX
+	INCL DX
 	JMP  search_loop_encodeBetterBlockAsm
 
 match_length_ok_encodeBetterBlockAsm:
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeBetterBlockAsm
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeBetterBlockAsm
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_match_emit_encodeBetterBlockAsm
-	CMPL BX, $0x01000000
+	CMPL SI, $0x01000000
 	JB   four_bytes_match_emit_encodeBetterBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL BX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL SI, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 
 four_bytes_match_emit_encodeBetterBlockAsm:
-	MOVL BX, R10
-	SHRL $0x10, R10
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R10, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R11
+	SHRL $0x10, R11
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R11, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 
 three_bytes_match_emit_encodeBetterBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 
 two_bytes_match_emit_encodeBetterBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeBetterBlockAsm
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm
 
 one_byte_match_emit_encodeBetterBlockAsm:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBetterBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBetterBlockAsm:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm
 
 memmove_long_match_emit_encodeBetterBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeBetterBlockAsm:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL DI, $0x00010000
+	CMPL R8, $0x00010000
 	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm
-	MOVB $0xff, (AX)
-	MOVL DI, 1(AX)
-	LEAL -64(R11), R11
-	ADDQ $0x05, AX
-	CMPL R11, $0x04
+	MOVB $0xff, (CX)
+	MOVL R8, 1(CX)
+	LEAL -64(R12), R12
+	ADDQ $0x05, CX
+	CMPL R12, $0x04
 	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm
 
 	// emitRepeat
 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy:
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy
-	CMPL R11, $0x0100ffff
+	CMPL R12, $0x0100ffff
 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy
-	LEAL -16842747(R11), R11
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(R12), R12
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy
 
 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy:
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 four_bytes_remain_match_nolit_encodeBetterBlockAsm:
-	TESTL R11, R11
+	TESTL R12, R12
 	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm
-	XORL  BX, BX
-	LEAL  -1(BX)(R11*4), R11
-	MOVB  R11, (AX)
-	MOVL  DI, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  SI, SI
+	LEAL  -1(SI)(R12*4), R12
+	MOVB  R12, (CX)
+	MOVL  R8, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 two_byte_offset_match_nolit_encodeBetterBlockAsm:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm
-	MOVL $0x00000001, BX
-	LEAL 16(BX), BX
-	MOVB DI, 1(AX)
-	MOVL DI, R8
-	SHRL $0x08, R8
-	SHLL $0x05, R8
-	ORL  R8, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R11
+	MOVL $0x00000001, SI
+	LEAL 16(SI), SI
+	MOVB R8, 1(CX)
+	MOVL R8, R9
+	SHRL $0x08, R9
+	SHLL $0x05, R9
+	ORL  R9, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R12
 
 	// emitRepeat
-	LEAL -4(R11), R11
+	LEAL -4(R12), R12
 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 
 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
-	CMPL R11, $0x0100ffff
+	CMPL R12, $0x0100ffff
 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
-	LEAL -16842747(R11), R11
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(R12), R12
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b
 
 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short_2b:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 long_offset_short_match_nolit_encodeBetterBlockAsm:
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 
 	// emitRepeat
 emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short:
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short
-	CMPL R11, $0x0100ffff
+	CMPL R12, $0x0100ffff
 	JB   repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short
-	LEAL -16842747(R11), R11
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(R12), R12
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_nolit_encodeBetterBlockAsm_emit_copy_short
 
 repeat_five_match_nolit_encodeBetterBlockAsm_emit_copy_short:
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_four_match_nolit_encodeBetterBlockAsm_emit_copy_short:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_three_match_nolit_encodeBetterBlockAsm_emit_copy_short:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_match_nolit_encodeBetterBlockAsm_emit_copy_short:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm_emit_copy_short:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 two_byte_offset_short_match_nolit_encodeBetterBlockAsm:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 emit_copy_three_match_nolit_encodeBetterBlockAsm:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 match_is_repeat_encodeBetterBlockAsm:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm
-	CMPL BX, $0x01000000
+	CMPL SI, $0x01000000
 	JB   four_bytes_match_emit_repeat_encodeBetterBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL BX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL SI, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 
 four_bytes_match_emit_repeat_encodeBetterBlockAsm:
-	MOVL BX, R10
-	SHRL $0x10, R10
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R10, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R11
+	SHRL $0x10, R11
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R11, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 
 three_bytes_match_emit_repeat_encodeBetterBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 
 two_bytes_match_emit_repeat_encodeBetterBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm
 
 one_byte_match_emit_repeat_encodeBetterBlockAsm:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_repeat_encodeBetterBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm
 
 memmove_long_match_emit_repeat_encodeBetterBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitRepeat
 emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm:
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm
 
 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm
-	CMPL R11, $0x0100ffff
+	CMPL R12, $0x0100ffff
 	JB   repeat_five_match_nolit_repeat_encodeBetterBlockAsm
-	LEAL -16842747(R11), R11
-	MOVL $0xfffb001d, (AX)
-	MOVB $0xff, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -16842747(R12), R12
+	MOVL $0xfffb001d, (CX)
+	MOVB $0xff, 4(CX)
+	ADDQ $0x05, CX
 	JMP  emit_repeat_again_match_nolit_repeat_encodeBetterBlockAsm
 
 repeat_five_match_nolit_repeat_encodeBetterBlockAsm:
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_four_match_nolit_repeat_encodeBetterBlockAsm:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_three_match_nolit_repeat_encodeBetterBlockAsm:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_match_nolit_repeat_encodeBetterBlockAsm:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm
 
 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 
 match_nolit_emitcopy_end_encodeBetterBlockAsm:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBetterBlockAsm
-	CMPQ AX, (SP)
-	JB   match_nolit_dst_ok_encodeBetterBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
-	RET
-
-match_nolit_dst_ok_encodeBetterBlockAsm:
-	MOVQ  $0x00cf1bbcdcbfa563, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x2f, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x32, R10
-	SHLQ  $0x08, R11
-	IMULQ BX, R11
-	SHRQ  $0x2f, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x32, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 524312(SP)(R10*4)
-	MOVL  R13, 524312(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	CMPQ CX, (SP)
+	JB   match_nolit_dst_ok_encodeBetterBlockAsm
+	MOVQ $0x00000000, ret+56(FP)
+	RET
+
+match_nolit_dst_ok_encodeBetterBlockAsm:
+	MOVQ  $0x00cf1bbcdcbfa563, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x08, R10
+	IMULQ SI, R10
+	SHRQ  $0x2f, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x32, R11
+	SHLQ  $0x08, R12
+	IMULQ SI, R12
+	SHRQ  $0x2f, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x32, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 524288(AX)(R11*4)
+	MOVL  R14, 524288(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeBetterBlockAsm:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeBetterBlockAsm
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x2f, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x08, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x2f, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x08, R11
+	IMULQ SI, R11
+	SHRQ  $0x2f, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeBetterBlockAsm
 
 emit_remainder_encodeBetterBlockAsm:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 5(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 5(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBetterBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBetterBlockAsm:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBetterBlockAsm
@@ -6932,41 +6938,41 @@ emit_remainder_ok_encodeBetterBlockAsm:
 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm
 	CMPL DX, $0x01000000
 	JB   four_bytes_emit_remainder_encodeBetterBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL DX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL DX, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 
 four_bytes_emit_remainder_encodeBetterBlockAsm:
 	MOVL DX, BX
 	SHRL $0x10, BX
-	MOVB $0xf8, (AX)
-	MOVW DX, 1(AX)
-	MOVB BL, 3(AX)
-	ADDQ $0x04, AX
+	MOVB $0xf8, (CX)
+	MOVW DX, 1(CX)
+	MOVB BL, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 
 three_bytes_emit_remainder_encodeBetterBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 
 two_bytes_emit_remainder_encodeBetterBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBetterBlockAsm
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm
 
 one_byte_emit_remainder_encodeBetterBlockAsm:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBetterBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -6982,73 +6988,73 @@ memmove_emit_remainder_encodeBetterBlockAsm:
 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBetterBlockAsm:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm
 
 memmove_long_emit_remainder_encodeBetterBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
 	MOVOU (SI), X4
@@ -7062,903 +7068,904 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBetterBlockAsm:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBetterBlockAsm4MB(dst []byte, src []byte) int
+// func encodeBetterBlockAsm4MB(dst []byte, src []byte, tmp *[589824]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm4MB(SB), $589848-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00001200, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm4MB(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00001200, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBetterBlockAsm4MB:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBetterBlockAsm4MB
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -6(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -6(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBetterBlockAsm4MB:
-	MOVL CX, BX
-	SUBL 12(SP), BX
-	SHRL $0x07, BX
-	CMPL BX, $0x63
+	MOVL DX, SI
+	SUBL 12(SP), SI
+	SHRL $0x07, SI
+	CMPL SI, $0x63
 	JBE  check_maxskip_ok_encodeBetterBlockAsm4MB
-	LEAL 100(CX), BX
+	LEAL 100(DX), SI
 	JMP  check_maxskip_cont_encodeBetterBlockAsm4MB
 
 check_maxskip_ok_encodeBetterBlockAsm4MB:
-	LEAL 1(CX)(BX*1), BX
+	LEAL 1(DX)(SI*1), SI
 
 check_maxskip_cont_encodeBetterBlockAsm4MB:
-	CMPL  BX, 8(SP)
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBetterBlockAsm4MB
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x00cf1bbcdcbfa563, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  524312(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 524312(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x00cf1bbcdcbfa563, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x2f, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x32, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  524288(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 524288(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeBetterBlockAsm4MB
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeBetterBlockAsm4MB
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeBetterBlockAsm4MB
 
 no_short_found_encodeBetterBlockAsm4MB:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeBetterBlockAsm4MB
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeBetterBlockAsm4MB
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBetterBlockAsm4MB
 
 candidateS_match_encodeBetterBlockAsm4MB:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x2f, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeBetterBlockAsm4MB
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeBetterBlockAsm4MB:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBetterBlockAsm4MB
 
 match_extend_back_loop_encodeBetterBlockAsm4MB:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBetterBlockAsm4MB
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBetterBlockAsm4MB
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBetterBlockAsm4MB
 	JMP  match_extend_back_loop_encodeBetterBlockAsm4MB
 
 match_extend_back_end_encodeBetterBlockAsm4MB:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 4(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 4(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBetterBlockAsm4MB
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBetterBlockAsm4MB:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm4MB
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm4MB
 
 matchlen_bsf_16match_nolit_encodeBetterBlockAsm4MB:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm4MB
 
 matchlen_match8_match_nolit_encodeBetterBlockAsm4MB:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm4MB
 
 matchlen_bsf_8_match_nolit_encodeBetterBlockAsm4MB:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm4MB
 
 matchlen_match4_match_nolit_encodeBetterBlockAsm4MB:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm4MB
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeBetterBlockAsm4MB:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
 	JB   match_nolit_end_encodeBetterBlockAsm4MB
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm4MB
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeBetterBlockAsm4MB
 
 matchlen_match1_match_nolit_encodeBetterBlockAsm4MB:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeBetterBlockAsm4MB
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeBetterBlockAsm4MB:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	CMPL 16(SP), DI
+	CMPL 16(SP), R8
 	JEQ  match_is_repeat_encodeBetterBlockAsm4MB
-	CMPL R11, $0x01
+	CMPL R12, $0x01
 	JA   match_length_ok_encodeBetterBlockAsm4MB
-	CMPL DI, $0x0000ffff
+	CMPL R8, $0x0000ffff
 	JBE  match_length_ok_encodeBetterBlockAsm4MB
-	MOVL 20(SP), CX
-	INCL CX
+	MOVL 20(SP), DX
+	INCL DX
 	JMP  search_loop_encodeBetterBlockAsm4MB
 
 match_length_ok_encodeBetterBlockAsm4MB:
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeBetterBlockAsm4MB
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeBetterBlockAsm4MB
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_match_emit_encodeBetterBlockAsm4MB
-	MOVL BX, R10
-	SHRL $0x10, R10
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R10, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R11
+	SHRL $0x10, R11
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R11, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
 
 three_bytes_match_emit_encodeBetterBlockAsm4MB:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
 
 two_bytes_match_emit_encodeBetterBlockAsm4MB:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeBetterBlockAsm4MB
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm4MB
 
 one_byte_match_emit_encodeBetterBlockAsm4MB:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBetterBlockAsm4MB:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm4MB_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBetterBlockAsm4MB:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm4MB
 
 memmove_long_match_emit_encodeBetterBlockAsm4MB:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeBetterBlockAsm4MB:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL DI, $0x00010000
+	CMPL R8, $0x00010000
 	JB   two_byte_offset_match_nolit_encodeBetterBlockAsm4MB
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
-	MOVB $0xff, (AX)
-	MOVL DI, 1(AX)
-	LEAL -64(R11), R11
-	ADDQ $0x05, AX
-	CMPL R11, $0x04
+	MOVB $0xff, (CX)
+	MOVL R8, 1(CX)
+	LEAL -64(R12), R12
+	ADDQ $0x05, CX
+	CMPL R12, $0x04
 	JB   four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 four_bytes_remain_match_nolit_encodeBetterBlockAsm4MB:
-	TESTL R11, R11
+	TESTL R12, R12
 	JZ    match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
-	XORL  BX, BX
-	LEAL  -1(BX)(R11*4), R11
-	MOVB  R11, (AX)
-	MOVL  DI, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  SI, SI
+	LEAL  -1(SI)(R12*4), R12
+	MOVB  R12, (CX)
+	MOVL  R8, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 two_byte_offset_match_nolit_encodeBetterBlockAsm4MB:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm4MB
-	MOVL $0x00000001, BX
-	LEAL 16(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R11
+	MOVL $0x00000001, SI
+	LEAL 16(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R12
 
 	// emitRepeat
-	LEAL -4(R11), R11
+	LEAL -4(R12), R12
 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short_2b:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 long_offset_short_match_nolit_encodeBetterBlockAsm4MB:
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_four_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_three_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm4MB_emit_copy_short:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 two_byte_offset_short_match_nolit_encodeBetterBlockAsm4MB:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm4MB
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 emit_copy_three_match_nolit_encodeBetterBlockAsm4MB:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 match_is_repeat_encodeBetterBlockAsm4MB:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm4MB
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB
-	MOVL BX, R10
-	SHRL $0x10, R10
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R10, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R11
+	SHRL $0x10, R11
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R11, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
 
 three_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
 
 two_bytes_match_emit_repeat_encodeBetterBlockAsm4MB:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm4MB
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB
 
 one_byte_match_emit_repeat_encodeBetterBlockAsm4MB:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_repeat_encodeBetterBlockAsm4MB:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm4MB_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm4MB:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB
 
 memmove_long_match_emit_repeat_encodeBetterBlockAsm4MB:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm4MB:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB
 
 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB
-	CMPL R11, $0x00010100
+	CMPL R12, $0x00010100
 	JB   repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB
-	LEAL -65536(R11), R11
-	MOVL R11, DI
-	MOVW $0x001d, (AX)
-	MOVW R11, 2(AX)
-	SARL $0x10, DI
-	MOVB DI, 4(AX)
-	ADDQ $0x05, AX
+	LEAL -65536(R12), R12
+	MOVL R12, R8
+	MOVW $0x001d, (CX)
+	MOVW R12, 2(CX)
+	SARL $0x10, R8
+	MOVB R8, 4(CX)
+	ADDQ $0x05, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_four_match_nolit_repeat_encodeBetterBlockAsm4MB:
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_three_match_nolit_repeat_encodeBetterBlockAsm4MB:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_match_nolit_repeat_encodeBetterBlockAsm4MB:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm4MB
 
 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm4MB:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 
 match_nolit_emitcopy_end_encodeBetterBlockAsm4MB:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBetterBlockAsm4MB
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBetterBlockAsm4MB
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBetterBlockAsm4MB:
-	MOVQ  $0x00cf1bbcdcbfa563, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x2f, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x32, R10
-	SHLQ  $0x08, R11
-	IMULQ BX, R11
-	SHRQ  $0x2f, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x32, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 524312(SP)(R10*4)
-	MOVL  R13, 524312(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x00cf1bbcdcbfa563, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x08, R10
+	IMULQ SI, R10
+	SHRQ  $0x2f, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x32, R11
+	SHLQ  $0x08, R12
+	IMULQ SI, R12
+	SHRQ  $0x2f, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x32, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 524288(AX)(R11*4)
+	MOVL  R14, 524288(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeBetterBlockAsm4MB:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeBetterBlockAsm4MB
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x2f, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x08, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x2f, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x08, R11
+	IMULQ SI, R11
+	SHRQ  $0x2f, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeBetterBlockAsm4MB
 
 emit_remainder_encodeBetterBlockAsm4MB:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 4(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 4(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBetterBlockAsm4MB
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBetterBlockAsm4MB:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBetterBlockAsm4MB
@@ -7968,33 +7975,33 @@ emit_remainder_ok_encodeBetterBlockAsm4MB:
 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm4MB
 	MOVL DX, BX
 	SHRL $0x10, BX
-	MOVB $0xf8, (AX)
-	MOVW DX, 1(AX)
-	MOVB BL, 3(AX)
-	ADDQ $0x04, AX
+	MOVB $0xf8, (CX)
+	MOVW DX, 1(CX)
+	MOVB BL, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
 
 three_bytes_emit_remainder_encodeBetterBlockAsm4MB:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
 
 two_bytes_emit_remainder_encodeBetterBlockAsm4MB:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBetterBlockAsm4MB
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm4MB
 
 one_byte_emit_remainder_encodeBetterBlockAsm4MB:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBetterBlockAsm4MB:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -8010,73 +8017,73 @@ memmove_emit_remainder_encodeBetterBlockAsm4MB:
 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm4MB_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBetterBlockAsm4MB:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB
 
 memmove_long_emit_remainder_encodeBetterBlockAsm4MB:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
 	MOVOU (SI), X4
@@ -8090,756 +8097,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm4MBlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBetterBlockAsm4MB:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBetterBlockAsm12B(dst []byte, src []byte) int
+// func encodeBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm12B(SB), $81944-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000280, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm12B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000280, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBetterBlockAsm12B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBetterBlockAsm12B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -6(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -6(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBetterBlockAsm12B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x06, BX
-	LEAL  1(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x06, SI
+	LEAL  1(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBetterBlockAsm12B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x34, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  65560(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 65560(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x34, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  65536(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 65536(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeBetterBlockAsm12B
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeBetterBlockAsm12B
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeBetterBlockAsm12B
 
 no_short_found_encodeBetterBlockAsm12B:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeBetterBlockAsm12B
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeBetterBlockAsm12B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBetterBlockAsm12B
 
 candidateS_match_encodeBetterBlockAsm12B:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeBetterBlockAsm12B
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeBetterBlockAsm12B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBetterBlockAsm12B
 
 match_extend_back_loop_encodeBetterBlockAsm12B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBetterBlockAsm12B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBetterBlockAsm12B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBetterBlockAsm12B
 	JMP  match_extend_back_loop_encodeBetterBlockAsm12B
 
 match_extend_back_end_encodeBetterBlockAsm12B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBetterBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBetterBlockAsm12B:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm12B
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm12B
 
 matchlen_bsf_16match_nolit_encodeBetterBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm12B
 
 matchlen_match8_match_nolit_encodeBetterBlockAsm12B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm12B
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm12B
 
 matchlen_bsf_8_match_nolit_encodeBetterBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm12B
 
 matchlen_match4_match_nolit_encodeBetterBlockAsm12B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm12B
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm12B
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeBetterBlockAsm12B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm12B
 	JB   match_nolit_end_encodeBetterBlockAsm12B
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm12B
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeBetterBlockAsm12B
 
 matchlen_match1_match_nolit_encodeBetterBlockAsm12B:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeBetterBlockAsm12B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeBetterBlockAsm12B:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	CMPL 16(SP), DI
+	CMPL 16(SP), R8
 	JEQ  match_is_repeat_encodeBetterBlockAsm12B
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm12B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeBetterBlockAsm12B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeBetterBlockAsm12B
 	JB   three_bytes_match_emit_encodeBetterBlockAsm12B
 
 three_bytes_match_emit_encodeBetterBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
 
 two_bytes_match_emit_encodeBetterBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeBetterBlockAsm12B
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm12B
 
 one_byte_match_emit_encodeBetterBlockAsm12B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBetterBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm12B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBetterBlockAsm12B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm12B
 
 memmove_long_match_emit_encodeBetterBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeBetterBlockAsm12B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm12B
-	MOVL $0x00000001, BX
-	LEAL 16(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R11
+	MOVL $0x00000001, SI
+	LEAL 16(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R12
 
 	// emitRepeat
-	LEAL -4(R11), R11
+	LEAL -4(R12), R12
 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short_2b:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 long_offset_short_match_nolit_encodeBetterBlockAsm12B:
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_three_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_two_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm12B_emit_copy_short:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 two_byte_offset_short_match_nolit_encodeBetterBlockAsm12B:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm12B
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 emit_copy_three_match_nolit_encodeBetterBlockAsm12B:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 match_is_repeat_encodeBetterBlockAsm12B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm12B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm12B
 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm12B
 
 three_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
 
 two_bytes_match_emit_repeat_encodeBetterBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm12B
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm12B
 
 one_byte_match_emit_repeat_encodeBetterBlockAsm12B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_repeat_encodeBetterBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm12B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm12B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B
 
 memmove_long_match_emit_repeat_encodeBetterBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm12B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B
 
 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_three_match_nolit_repeat_encodeBetterBlockAsm12B:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_two_match_nolit_repeat_encodeBetterBlockAsm12B:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm12B
 
 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm12B:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 
 match_nolit_emitcopy_end_encodeBetterBlockAsm12B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBetterBlockAsm12B
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBetterBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBetterBlockAsm12B:
-	MOVQ  $0x0000cf1bbcdcbf9b, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x32, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x34, R10
-	SHLQ  $0x10, R11
-	IMULQ BX, R11
-	SHRQ  $0x32, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x34, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 65560(SP)(R10*4)
-	MOVL  R13, 65560(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x0000cf1bbcdcbf9b, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x10, R10
+	IMULQ SI, R10
+	SHRQ  $0x32, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x34, R11
+	SHLQ  $0x10, R12
+	IMULQ SI, R12
+	SHRQ  $0x32, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x34, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 65536(AX)(R11*4)
+	MOVL  R14, 65536(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeBetterBlockAsm12B:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeBetterBlockAsm12B
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x32, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x10, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x32, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x10, R11
+	IMULQ SI, R11
+	SHRQ  $0x32, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeBetterBlockAsm12B
 
 emit_remainder_encodeBetterBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBetterBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBetterBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBetterBlockAsm12B
@@ -8848,26 +8856,26 @@ emit_remainder_ok_encodeBetterBlockAsm12B:
 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm12B
 
 three_bytes_emit_remainder_encodeBetterBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
 
 two_bytes_emit_remainder_encodeBetterBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBetterBlockAsm12B
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm12B
 
 one_byte_emit_remainder_encodeBetterBlockAsm12B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBetterBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -8883,73 +8891,73 @@ memmove_emit_remainder_encodeBetterBlockAsm12B:
 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm12B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBetterBlockAsm12B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm12B
 
 memmove_long_emit_remainder_encodeBetterBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -8963,756 +8971,757 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
-	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm12Blarge_forward_sse_loop_32
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBetterBlockAsm12B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBetterBlockAsm10B(dst []byte, src []byte) int
+// func encodeBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm10B(SB), $20504-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x000000a0, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm10B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x000000a0, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBetterBlockAsm10B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBetterBlockAsm10B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -6(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -6(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBetterBlockAsm10B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x05, BX
-	LEAL  1(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x05, SI
+	LEAL  1(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBetterBlockAsm10B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x36, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  16408(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 16408(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x34, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x36, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  16384(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 16384(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeBetterBlockAsm10B
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeBetterBlockAsm10B
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeBetterBlockAsm10B
 
 no_short_found_encodeBetterBlockAsm10B:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeBetterBlockAsm10B
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeBetterBlockAsm10B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBetterBlockAsm10B
 
 candidateS_match_encodeBetterBlockAsm10B:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x34, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeBetterBlockAsm10B
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeBetterBlockAsm10B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBetterBlockAsm10B
 
 match_extend_back_loop_encodeBetterBlockAsm10B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBetterBlockAsm10B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBetterBlockAsm10B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBetterBlockAsm10B
 	JMP  match_extend_back_loop_encodeBetterBlockAsm10B
 
 match_extend_back_end_encodeBetterBlockAsm10B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBetterBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBetterBlockAsm10B:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm10B
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm10B
 
 matchlen_bsf_16match_nolit_encodeBetterBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm10B
 
 matchlen_match8_match_nolit_encodeBetterBlockAsm10B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm10B
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm10B
 
 matchlen_bsf_8_match_nolit_encodeBetterBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm10B
 
 matchlen_match4_match_nolit_encodeBetterBlockAsm10B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm10B
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm10B
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeBetterBlockAsm10B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm10B
 	JB   match_nolit_end_encodeBetterBlockAsm10B
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm10B
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeBetterBlockAsm10B
 
 matchlen_match1_match_nolit_encodeBetterBlockAsm10B:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeBetterBlockAsm10B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeBetterBlockAsm10B:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	CMPL 16(SP), DI
+	CMPL 16(SP), R8
 	JEQ  match_is_repeat_encodeBetterBlockAsm10B
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm10B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeBetterBlockAsm10B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeBetterBlockAsm10B
 	JB   three_bytes_match_emit_encodeBetterBlockAsm10B
 
 three_bytes_match_emit_encodeBetterBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
 
 two_bytes_match_emit_encodeBetterBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeBetterBlockAsm10B
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm10B
 
 one_byte_match_emit_encodeBetterBlockAsm10B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBetterBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm10B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBetterBlockAsm10B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm10B
 
 memmove_long_match_emit_encodeBetterBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeBetterBlockAsm10B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm10B
-	MOVL $0x00000001, BX
-	LEAL 16(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R11
+	MOVL $0x00000001, SI
+	LEAL 16(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R12
 
 	// emitRepeat
-	LEAL -4(R11), R11
+	LEAL -4(R12), R12
 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short_2b:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 long_offset_short_match_nolit_encodeBetterBlockAsm10B:
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_three_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_two_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_two_offset_match_nolit_encodeBetterBlockAsm10B_emit_copy_short:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 two_byte_offset_short_match_nolit_encodeBetterBlockAsm10B:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm10B
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 emit_copy_three_match_nolit_encodeBetterBlockAsm10B:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 match_is_repeat_encodeBetterBlockAsm10B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm10B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm10B
 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm10B
 
 three_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
 
 two_bytes_match_emit_repeat_encodeBetterBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm10B
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm10B
 
 one_byte_match_emit_repeat_encodeBetterBlockAsm10B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_repeat_encodeBetterBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm10B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm10B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B
 
 memmove_long_match_emit_repeat_encodeBetterBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm10B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JB   repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B
 
 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_three_match_nolit_repeat_encodeBetterBlockAsm10B:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_two_match_nolit_repeat_encodeBetterBlockAsm10B:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm10B
 
 repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm10B:
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 
 match_nolit_emitcopy_end_encodeBetterBlockAsm10B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBetterBlockAsm10B
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBetterBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBetterBlockAsm10B:
-	MOVQ  $0x0000cf1bbcdcbf9b, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x34, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x36, R10
-	SHLQ  $0x10, R11
-	IMULQ BX, R11
-	SHRQ  $0x34, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x36, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 16408(SP)(R10*4)
-	MOVL  R13, 16408(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x0000cf1bbcdcbf9b, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x10, R10
+	IMULQ SI, R10
+	SHRQ  $0x34, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x36, R11
+	SHLQ  $0x10, R12
+	IMULQ SI, R12
+	SHRQ  $0x34, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x36, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 16384(AX)(R11*4)
+	MOVL  R14, 16384(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeBetterBlockAsm10B:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeBetterBlockAsm10B
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x34, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x10, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x34, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x10, R11
+	IMULQ SI, R11
+	SHRQ  $0x34, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeBetterBlockAsm10B
 
 emit_remainder_encodeBetterBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBetterBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBetterBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBetterBlockAsm10B
@@ -9721,26 +9730,26 @@ emit_remainder_ok_encodeBetterBlockAsm10B:
 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm10B
 
 three_bytes_emit_remainder_encodeBetterBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
 
 two_bytes_emit_remainder_encodeBetterBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBetterBlockAsm10B
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm10B
 
 one_byte_emit_remainder_encodeBetterBlockAsm10B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBetterBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -9756,73 +9765,73 @@ memmove_emit_remainder_encodeBetterBlockAsm10B:
 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm10B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBetterBlockAsm10B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm10B
 
 memmove_long_emit_remainder_encodeBetterBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -9836,742 +9845,743 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBetterBlockAsm10B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeBetterBlockAsm8B(dst []byte, src []byte) int
+// func encodeBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeBetterBlockAsm8B(SB), $5144-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000028, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeBetterBlockAsm8B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000028, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeBetterBlockAsm8B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeBetterBlockAsm8B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -6(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -6(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeBetterBlockAsm8B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x04, BX
-	LEAL  1(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x04, SI
+	LEAL  1(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeBetterBlockAsm8B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x38, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  4120(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 4120(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x36, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x38, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  4096(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 4096(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeBetterBlockAsm8B
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeBetterBlockAsm8B
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeBetterBlockAsm8B
 
 no_short_found_encodeBetterBlockAsm8B:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeBetterBlockAsm8B
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeBetterBlockAsm8B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeBetterBlockAsm8B
 
 candidateS_match_encodeBetterBlockAsm8B:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x36, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeBetterBlockAsm8B
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeBetterBlockAsm8B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeBetterBlockAsm8B
 
 match_extend_back_loop_encodeBetterBlockAsm8B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeBetterBlockAsm8B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeBetterBlockAsm8B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeBetterBlockAsm8B
 	JMP  match_extend_back_loop_encodeBetterBlockAsm8B
 
 match_extend_back_end_encodeBetterBlockAsm8B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeBetterBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeBetterBlockAsm8B:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeBetterBlockAsm8B
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeBetterBlockAsm8B
 
 matchlen_bsf_16match_nolit_encodeBetterBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm8B
 
 matchlen_match8_match_nolit_encodeBetterBlockAsm8B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeBetterBlockAsm8B
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeBetterBlockAsm8B
 
 matchlen_bsf_8_match_nolit_encodeBetterBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeBetterBlockAsm8B
 
 matchlen_match4_match_nolit_encodeBetterBlockAsm8B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeBetterBlockAsm8B
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeBetterBlockAsm8B
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeBetterBlockAsm8B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeBetterBlockAsm8B
 	JB   match_nolit_end_encodeBetterBlockAsm8B
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeBetterBlockAsm8B
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeBetterBlockAsm8B
 
 matchlen_match1_match_nolit_encodeBetterBlockAsm8B:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeBetterBlockAsm8B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeBetterBlockAsm8B:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	CMPL 16(SP), DI
+	CMPL 16(SP), R8
 	JEQ  match_is_repeat_encodeBetterBlockAsm8B
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeBetterBlockAsm8B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeBetterBlockAsm8B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeBetterBlockAsm8B
 	JB   three_bytes_match_emit_encodeBetterBlockAsm8B
 
 three_bytes_match_emit_encodeBetterBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
 
 two_bytes_match_emit_encodeBetterBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeBetterBlockAsm8B
 	JMP  memmove_long_match_emit_encodeBetterBlockAsm8B
 
 one_byte_match_emit_encodeBetterBlockAsm8B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeBetterBlockAsm8B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x04
+	CMPQ R9, $0x04
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JB   emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4:
-	MOVL (R9), R10
-	MOVL R10, (AX)
+	MOVL (R10), R11
+	MOVL R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_4through7:
-	MOVL (R9), R10
-	MOVL -4(R9)(R8*1), R9
-	MOVL R10, (AX)
-	MOVL R9, -4(AX)(R8*1)
+	MOVL (R10), R11
+	MOVL -4(R10)(R9*1), R10
+	MOVL R11, (CX)
+	MOVL R10, -4(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeBetterBlockAsm8B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeBetterBlockAsm8B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeBetterBlockAsm8B
 
 memmove_long_match_emit_encodeBetterBlockAsm8B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeBetterBlockAsm8B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  long_offset_short_match_nolit_encodeBetterBlockAsm8B
-	MOVL $0x00000001, BX
-	LEAL 16(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
-	SUBL $0x08, R11
+	MOVL $0x00000001, SI
+	LEAL 16(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
+	SUBL $0x08, R12
 
 	// emitRepeat
-	LEAL -4(R11), R11
+	LEAL -4(R12), R12
 	JMP  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short_2b:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 long_offset_short_match_nolit_encodeBetterBlockAsm8B:
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
 
 cant_repeat_two_offset_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 repeat_three_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
-repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
-	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+repeat_two_match_nolit_encodeBetterBlockAsm8B_emit_copy_short:
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
+	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 two_byte_offset_short_match_nolit_encodeBetterBlockAsm8B:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeBetterBlockAsm8B
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 emit_copy_three_match_nolit_encodeBetterBlockAsm8B:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 match_is_repeat_encodeBetterBlockAsm8B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_repeat_encodeBetterBlockAsm8B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_repeat_encodeBetterBlockAsm8B
 	JB   three_bytes_match_emit_repeat_encodeBetterBlockAsm8B
 
 three_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
 
 two_bytes_match_emit_repeat_encodeBetterBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_repeat_encodeBetterBlockAsm8B
 	JMP  memmove_long_match_emit_repeat_encodeBetterBlockAsm8B
 
 one_byte_match_emit_repeat_encodeBetterBlockAsm8B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_repeat_encodeBetterBlockAsm8B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveShort
-	CMPQ DI, $0x04
+	CMPQ R8, $0x04
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4
-	CMPQ DI, $0x08
+	CMPQ R8, $0x08
 	JB   emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7
-	CMPQ DI, $0x10
+	CMPQ R8, $0x10
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16
-	CMPQ DI, $0x20
+	CMPQ R8, $0x20
 	JBE  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4:
-	MOVL (R8), R9
-	MOVL R9, (AX)
+	MOVL (R9), R10
+	MOVL R10, (CX)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_4through7:
-	MOVL (R8), R9
-	MOVL -4(R8)(DI*1), R8
-	MOVL R9, (AX)
-	MOVL R8, -4(AX)(DI*1)
+	MOVL (R9), R10
+	MOVL -4(R9)(R8*1), R9
+	MOVL R10, (CX)
+	MOVL R9, -4(CX)(R8*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_8through16:
-	MOVQ (R8), R9
-	MOVQ -8(R8)(DI*1), R8
-	MOVQ R9, (AX)
-	MOVQ R8, -8(AX)(DI*1)
+	MOVQ (R9), R10
+	MOVQ -8(R9)(R8*1), R9
+	MOVQ R10, (CX)
+	MOVQ R9, -8(CX)(R8*1)
 	JMP  memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_17through32:
-	MOVOU (R8), X0
-	MOVOU -16(R8)(DI*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU -16(R9)(R8*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R8*1)
 	JMP   memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_repeat_encodeBetterBlockAsm8B_memmove_move_33through64:
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
 
 memmove_end_copy_match_emit_repeat_encodeBetterBlockAsm8B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B
 
 memmove_long_match_emit_repeat_encodeBetterBlockAsm8B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveLong
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVQ  DI, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R12
-	SUBQ  R9, R12
-	DECQ  R10
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVQ  R8, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R13
+	SUBQ  R10, R13
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(R8)(R12*1), R9
-	LEAQ  -32(AX)(R12*1), R13
+	LEAQ  -32(R9)(R13*1), R10
+	LEAQ  -32(CX)(R13*1), R14
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R13)
-	MOVOA X5, 16(R13)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R14)
+	MOVOA X5, 16(R14)
+	ADDQ  $0x20, R14
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R13
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R12
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(R8)(R12*1), X4
-	MOVOU -16(R8)(R12*1), X5
-	MOVOA X4, -32(AX)(R12*1)
-	MOVOA X5, -16(AX)(R12*1)
-	ADDQ  $0x20, R12
-	CMPQ  DI, R12
+	MOVOU -32(R9)(R13*1), X4
+	MOVOU -16(R9)(R13*1), X5
+	MOVOA X4, -32(CX)(R13*1)
+	MOVOA X5, -16(CX)(R13*1)
+	ADDQ  $0x20, R13
+	CMPQ  R8, R13
 	JAE   emit_lit_memmove_long_match_emit_repeat_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_repeat_encodeBetterBlockAsm8B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitRepeat
-	MOVL R11, BX
-	LEAL -4(R11), R11
-	CMPL BX, $0x08
+	MOVL R12, SI
+	LEAL -4(R12), R12
+	CMPL SI, $0x08
 	JBE  repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B
-	CMPL BX, $0x0c
+	CMPL SI, $0x0c
 	JAE  cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B
 
 cant_repeat_two_offset_match_nolit_repeat_encodeBetterBlockAsm8B:
-	CMPL R11, $0x00000104
+	CMPL R12, $0x00000104
 	JB   repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B
-	LEAL -256(R11), R11
-	MOVW $0x0019, (AX)
-	MOVW R11, 2(AX)
-	ADDQ $0x04, AX
+	LEAL -256(R12), R12
+	MOVW $0x0019, (CX)
+	MOVW R12, 2(CX)
+	ADDQ $0x04, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 repeat_three_match_nolit_repeat_encodeBetterBlockAsm8B:
-	LEAL -4(R11), R11
-	MOVW $0x0015, (AX)
-	MOVB R11, 2(AX)
-	ADDQ $0x03, AX
+	LEAL -4(R12), R12
+	MOVW $0x0015, (CX)
+	MOVB R12, 2(CX)
+	ADDQ $0x03, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
 
 repeat_two_match_nolit_repeat_encodeBetterBlockAsm8B:
-	SHLL $0x02, R11
-	ORL  $0x01, R11
-	MOVW R11, (AX)
-	ADDQ $0x02, AX
+	SHLL $0x02, R12
+	ORL  $0x01, R12
+	MOVW R12, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeBetterBlockAsm8B
-	XORQ BX, BX
-	LEAL 1(BX)(R11*4), R11
-	MOVB DI, 1(AX)
-	SARL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, R11
-	MOVB R11, (AX)
-	ADDQ $0x02, AX
+	XORQ SI, SI
+	LEAL 1(SI)(R12*4), R12
+	MOVB R8, 1(CX)
+	SARL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, R12
+	MOVB R12, (CX)
+	ADDQ $0x02, CX
 
 match_nolit_emitcopy_end_encodeBetterBlockAsm8B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeBetterBlockAsm8B
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeBetterBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeBetterBlockAsm8B:
-	MOVQ  $0x0000cf1bbcdcbf9b, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x36, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x38, R10
-	SHLQ  $0x10, R11
-	IMULQ BX, R11
-	SHRQ  $0x36, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x38, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 4120(SP)(R10*4)
-	MOVL  R13, 4120(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x0000cf1bbcdcbf9b, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x10, R10
+	IMULQ SI, R10
+	SHRQ  $0x36, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x38, R11
+	SHLQ  $0x10, R12
+	IMULQ SI, R12
+	SHRQ  $0x36, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x38, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 4096(AX)(R11*4)
+	MOVL  R14, 4096(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeBetterBlockAsm8B:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeBetterBlockAsm8B
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x36, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x10, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x36, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x10, R11
+	IMULQ SI, R11
+	SHRQ  $0x36, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeBetterBlockAsm8B
 
 emit_remainder_encodeBetterBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeBetterBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeBetterBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeBetterBlockAsm8B
@@ -10580,26 +10590,26 @@ emit_remainder_ok_encodeBetterBlockAsm8B:
 	JB   three_bytes_emit_remainder_encodeBetterBlockAsm8B
 
 three_bytes_emit_remainder_encodeBetterBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
 
 two_bytes_emit_remainder_encodeBetterBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeBetterBlockAsm8B
 	JMP  memmove_long_emit_remainder_encodeBetterBlockAsm8B
 
 one_byte_emit_remainder_encodeBetterBlockAsm8B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeBetterBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -10615,73 +10625,73 @@ memmove_emit_remainder_encodeBetterBlockAsm8B:
 	JMP  emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeBetterBlockAsm8B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeBetterBlockAsm8B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeBetterBlockAsm8B
 
 memmove_long_emit_remainder_encodeBetterBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -10695,798 +10705,799 @@ emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeBetterBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeBetterBlockAsm8B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBlockAsm(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm(dst []byte, src []byte, tmp *[65536]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm(SB), $65560-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000200, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000200, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBlockAsm:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBlockAsm
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBlockAsm:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x06, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x06, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBlockAsm
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x10, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x10, R11
+	IMULQ R9, R11
+	SHRQ  $0x32, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeSnappyBlockAsm
-	LEAL  1(CX), SI
-	MOVL  12(SP), BX
-	MOVL  SI, DI
-	SUBL  16(SP), DI
+	LEAL  1(DX), DI
+	MOVL  12(SP), SI
+	MOVL  DI, R8
+	SUBL  16(SP), R8
 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm
 
 repeat_extend_back_loop_encodeSnappyBlockAsm:
-	CMPL SI, BX
+	CMPL DI, SI
 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm
-	MOVB -1(DX)(DI*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(R8*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm
-	LEAL -1(SI), SI
-	DECL DI
+	LEAL -1(DI), DI
+	DECL R8
 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm
 
 repeat_extend_back_end_encodeSnappyBlockAsm:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 5(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 5(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeSnappyBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeSnappyBlockAsm:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm
-	CMPL BX, $0x01000000
+	CMPL SI, $0x01000000
 	JB   four_bytes_repeat_emit_encodeSnappyBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL BX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL SI, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 
 four_bytes_repeat_emit_encodeSnappyBlockAsm:
-	MOVL BX, R9
-	SHRL $0x10, R9
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R9, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R10
+	SHRL $0x10, R10
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R10, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 
 three_bytes_repeat_emit_encodeSnappyBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 
 two_bytes_repeat_emit_encodeSnappyBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeSnappyBlockAsm
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm
 
 one_byte_repeat_emit_encodeSnappyBlockAsm:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeSnappyBlockAsm:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveShort
-	CMPQ DI, $0x08
+	CMPQ R8, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8
-	CMPQ DI, $0x10
+	CMPQ R8, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16
-	CMPQ DI, $0x20
+	CMPQ R8, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8:
-	MOVQ (R8), R9
-	MOVQ R9, (AX)
+	MOVQ (R9), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_8through16:
-	MOVQ (R8), R9
-	MOVQ -8(R8)(DI*1), R8
-	MOVQ R9, (AX)
-	MOVQ R8, -8(AX)(DI*1)
+	MOVQ (R9), R10
+	MOVQ -8(R9)(R8*1), R9
+	MOVQ R10, (CX)
+	MOVQ R9, -8(CX)(R8*1)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_17through32:
-	MOVOU (R8), X0
-	MOVOU -16(R8)(DI*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU -16(R9)(R8*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R8*1)
 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm_memmove_move_33through64:
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
 
 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm
 
 memmove_long_repeat_emit_encodeSnappyBlockAsm:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveLong
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVQ  DI, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVQ  R8, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(R8)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(R9)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(R8)(R11*1), X4
-	MOVOU -16(R8)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  DI, R11
+	MOVOU -32(R9)(R12*1), X4
+	MOVOU -16(R9)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R8, R12
 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeSnappyBlockAsm:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R10, R10
+	XORL R11, R11
 
 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm
-	MOVQ (R8)(R10*1), R9
-	MOVQ 8(R8)(R10*1), R11
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	MOVQ 8(R9)(R11*1), R12
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
-	XORQ 8(BX)(R10*1), R11
+	XORQ 8(SI)(R11*1), R12
 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm
-	LEAL -16(DI), DI
-	LEAL 16(R10), R10
+	LEAL -16(R8), R8
+	LEAL 16(R11), R11
 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm
 
 matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R11, R11
+	TZCNTQ R12, R12
 
 #else
-	BSFQ R11, R11
+	BSFQ R12, R12
 
 #endif
-	SARQ $0x03, R11
-	LEAL 8(R10)(R11*1), R10
+	SARQ $0x03, R12
+	LEAL 8(R11)(R12*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
 
 matchlen_match8_repeat_extend_encodeSnappyBlockAsm:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm
-	MOVQ (R8)(R10*1), R9
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm
-	LEAL -8(DI), DI
-	LEAL 8(R10), R10
+	LEAL -8(R8), R8
+	LEAL 8(R11), R11
 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm
 
 matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R9, R9
+	TZCNTQ R10, R10
 
 #else
-	BSFQ R9, R9
+	BSFQ R10, R10
 
 #endif
-	SARQ $0x03, R9
-	LEAL (R10)(R9*1), R10
+	SARQ $0x03, R10
+	LEAL (R11)(R10*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm
 
 matchlen_match4_repeat_extend_encodeSnappyBlockAsm:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm
-	MOVL (R8)(R10*1), R9
-	CMPL (BX)(R10*1), R9
+	MOVL (R9)(R11*1), R10
+	CMPL (SI)(R11*1), R10
 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm
-	LEAL -4(DI), DI
-	LEAL 4(R10), R10
+	LEAL -4(R8), R8
+	LEAL 4(R11), R11
 
 matchlen_match2_repeat_extend_encodeSnappyBlockAsm:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm
 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm
-	MOVW (R8)(R10*1), R9
-	CMPW (BX)(R10*1), R9
+	MOVW (R9)(R11*1), R10
+	CMPW (SI)(R11*1), R10
 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm
-	LEAL 2(R10), R10
-	SUBL $0x02, DI
+	LEAL 2(R11), R11
+	SUBL $0x02, R8
 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm
 
 matchlen_match1_repeat_extend_encodeSnappyBlockAsm:
-	MOVB (R8)(R10*1), R9
-	CMPB (BX)(R10*1), R9
+	MOVB (R9)(R11*1), R10
+	CMPB (SI)(R11*1), R10
 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm
-	LEAL 1(R10), R10
+	LEAL 1(R11), R11
 
 repeat_extend_forward_end_encodeSnappyBlockAsm:
-	ADDL R10, CX
-	MOVL CX, BX
-	SUBL SI, BX
-	MOVL 16(SP), SI
+	ADDL R11, DX
+	MOVL DX, SI
+	SUBL DI, SI
+	MOVL 16(SP), DI
 
 	// emitCopy
-	CMPL SI, $0x00010000
+	CMPL DI, $0x00010000
 	JB   two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
 
 four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
-	MOVB $0xff, (AX)
-	MOVL SI, 1(AX)
-	LEAL -64(BX), BX
-	ADDQ $0x05, AX
-	CMPL BX, $0x04
+	MOVB $0xff, (CX)
+	MOVL DI, 1(CX)
+	LEAL -64(SI), SI
+	ADDQ $0x05, CX
+	CMPL SI, $0x04
 	JB   four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm
 	JMP  four_bytes_loop_back_repeat_as_copy_encodeSnappyBlockAsm
 
 four_bytes_remain_repeat_as_copy_encodeSnappyBlockAsm:
-	TESTL BX, BX
+	TESTL SI, SI
 	JZ    repeat_end_emit_encodeSnappyBlockAsm
-	XORL  DI, DI
-	LEAL  -1(DI)(BX*4), BX
-	MOVB  BL, (AX)
-	MOVL  SI, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  R8, R8
+	LEAL  -1(R8)(SI*4), SI
+	MOVB  SI, (CX)
+	MOVL  DI, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   repeat_end_emit_encodeSnappyBlockAsm
 
 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm
 
 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeSnappyBlockAsm
 
 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeSnappyBlockAsm:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeSnappyBlockAsm
 
 no_repeat_found_encodeSnappyBlockAsm:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeSnappyBlockAsm
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeSnappyBlockAsm
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeSnappyBlockAsm
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBlockAsm
 
 candidate3_match_encodeSnappyBlockAsm:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeSnappyBlockAsm
 
 candidate2_match_encodeSnappyBlockAsm:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeSnappyBlockAsm:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBlockAsm
 
 match_extend_back_loop_encodeSnappyBlockAsm:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBlockAsm
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBlockAsm
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBlockAsm
 	JMP  match_extend_back_loop_encodeSnappyBlockAsm
 
 match_extend_back_end_encodeSnappyBlockAsm:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 5(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 5(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBlockAsm:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBlockAsm
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBlockAsm
-	CMPL DI, $0x00010000
+	CMPL R8, $0x00010000
 	JB   three_bytes_match_emit_encodeSnappyBlockAsm
-	CMPL DI, $0x01000000
+	CMPL R8, $0x01000000
 	JB   four_bytes_match_emit_encodeSnappyBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL DI, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL R8, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 
 four_bytes_match_emit_encodeSnappyBlockAsm:
-	MOVL DI, R9
-	SHRL $0x10, R9
-	MOVB $0xf8, (AX)
-	MOVW DI, 1(AX)
-	MOVB R9, 3(AX)
-	ADDQ $0x04, AX
+	MOVL R8, R10
+	SHRL $0x10, R10
+	MOVB $0xf8, (CX)
+	MOVW R8, 1(CX)
+	MOVB R10, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 
 three_bytes_match_emit_encodeSnappyBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 
 two_bytes_match_emit_encodeSnappyBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeSnappyBlockAsm
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm
 
 one_byte_match_emit_encodeSnappyBlockAsm:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBlockAsm:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBlockAsm:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm
 
 memmove_long_match_emit_encodeSnappyBlockAsm:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeSnappyBlockAsm:
 match_nolit_loop_encodeSnappyBlockAsm:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm
 
 matchlen_bsf_16match_nolit_encodeSnappyBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm
 
 matchlen_match8_match_nolit_encodeSnappyBlockAsm:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm
 
 matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm
 
 matchlen_match4_match_nolit_encodeSnappyBlockAsm:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeSnappyBlockAsm:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm
 	JB   match_nolit_end_encodeSnappyBlockAsm
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeSnappyBlockAsm
 
 matchlen_match1_match_nolit_encodeSnappyBlockAsm:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeSnappyBlockAsm
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeSnappyBlockAsm:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   two_byte_offset_match_nolit_encodeSnappyBlockAsm
 
 four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  four_bytes_remain_match_nolit_encodeSnappyBlockAsm
-	MOVB $0xff, (AX)
-	MOVL BX, 1(AX)
-	LEAL -64(R9), R9
-	ADDQ $0x05, AX
-	CMPL R9, $0x04
+	MOVB $0xff, (CX)
+	MOVL SI, 1(CX)
+	LEAL -64(R10), R10
+	ADDQ $0x05, CX
+	CMPL R10, $0x04
 	JB   four_bytes_remain_match_nolit_encodeSnappyBlockAsm
 	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBlockAsm
 
 four_bytes_remain_match_nolit_encodeSnappyBlockAsm:
-	TESTL R9, R9
+	TESTL R10, R10
 	JZ    match_nolit_emitcopy_end_encodeSnappyBlockAsm
-	XORL  SI, SI
-	LEAL  -1(SI)(R9*4), R9
-	MOVB  R9, (AX)
-	MOVL  BX, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  DI, DI
+	LEAL  -1(DI)(R10*4), R10
+	MOVB  R10, (CX)
+	MOVL  SI, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   match_nolit_emitcopy_end_encodeSnappyBlockAsm
 
 two_byte_offset_match_nolit_encodeSnappyBlockAsm:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm
 
 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm
 
 emit_copy_three_match_nolit_encodeSnappyBlockAsm:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBlockAsm:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBlockAsm
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBlockAsm:
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x10, DI
-	IMULQ R8, DI
-	SHRQ  $0x32, DI
-	SHLQ  $0x10, BX
-	IMULQ R8, BX
-	SHRQ  $0x32, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x10, R8
+	IMULQ R9, R8
+	SHRQ  $0x32, R8
+	SHLQ  $0x10, SI
+	IMULQ R9, SI
+	SHRQ  $0x32, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeSnappyBlockAsm
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeSnappyBlockAsm
 
 emit_remainder_encodeSnappyBlockAsm:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 5(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 5(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBlockAsm:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm
@@ -11496,41 +11507,41 @@ emit_remainder_ok_encodeSnappyBlockAsm:
 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm
 	CMPL DX, $0x01000000
 	JB   four_bytes_emit_remainder_encodeSnappyBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL DX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL DX, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 
 four_bytes_emit_remainder_encodeSnappyBlockAsm:
 	MOVL DX, BX
 	SHRL $0x10, BX
-	MOVB $0xf8, (AX)
-	MOVW DX, 1(AX)
-	MOVB BL, 3(AX)
-	ADDQ $0x04, AX
+	MOVB $0xf8, (CX)
+	MOVW DX, 1(CX)
+	MOVB BL, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 
 three_bytes_emit_remainder_encodeSnappyBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 
 two_bytes_emit_remainder_encodeSnappyBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBlockAsm
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm
 
 one_byte_emit_remainder_encodeSnappyBlockAsm:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -11546,73 +11557,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm
 
 memmove_long_emit_remainder_encodeSnappyBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
 	MOVOU (SI), X4
@@ -11626,718 +11637,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBlockAsm:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBlockAsm64K(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm64K(dst []byte, src []byte, tmp *[65536]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm64K(SB), $65560-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000200, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm64K(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000200, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBlockAsm64K:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBlockAsm64K
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBlockAsm64K:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x06, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x06, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBlockAsm64K
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x10, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x10, R11
+	IMULQ R9, R11
+	SHRQ  $0x32, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeSnappyBlockAsm64K
-	LEAL  1(CX), SI
-	MOVL  12(SP), BX
-	MOVL  SI, DI
-	SUBL  16(SP), DI
+	LEAL  1(DX), DI
+	MOVL  12(SP), SI
+	MOVL  DI, R8
+	SUBL  16(SP), R8
 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm64K
 
 repeat_extend_back_loop_encodeSnappyBlockAsm64K:
-	CMPL SI, BX
+	CMPL DI, SI
 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm64K
-	MOVB -1(DX)(DI*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(R8*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm64K
-	LEAL -1(SI), SI
-	DECL DI
+	LEAL -1(DI), DI
+	DECL R8
 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm64K
 
 repeat_extend_back_end_encodeSnappyBlockAsm64K:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeSnappyBlockAsm64K
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeSnappyBlockAsm64K:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm64K
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm64K
 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm64K
 
 three_bytes_repeat_emit_encodeSnappyBlockAsm64K:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
 
 two_bytes_repeat_emit_encodeSnappyBlockAsm64K:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeSnappyBlockAsm64K
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm64K
 
 one_byte_repeat_emit_encodeSnappyBlockAsm64K:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeSnappyBlockAsm64K:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveShort
-	CMPQ DI, $0x08
+	CMPQ R8, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8
-	CMPQ DI, $0x10
+	CMPQ R8, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
-	CMPQ DI, $0x20
+	CMPQ R8, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8:
-	MOVQ (R8), R9
-	MOVQ R9, (AX)
+	MOVQ (R9), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
-	MOVQ (R8), R9
-	MOVQ -8(R8)(DI*1), R8
-	MOVQ R9, (AX)
-	MOVQ R8, -8(AX)(DI*1)
+	MOVQ (R9), R10
+	MOVQ -8(R9)(R8*1), R9
+	MOVQ R10, (CX)
+	MOVQ R9, -8(CX)(R8*1)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
-	MOVOU (R8), X0
-	MOVOU -16(R8)(DI*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU -16(R9)(R8*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R8*1)
 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
 
 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm64K:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K
 
 memmove_long_repeat_emit_encodeSnappyBlockAsm64K:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveLong
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVQ  DI, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVQ  R8, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
-	LEAQ  -32(R8)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(R9)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
-	MOVOU -32(R8)(R11*1), X4
-	MOVOU -16(R8)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  DI, R11
+	MOVOU -32(R9)(R12*1), X4
+	MOVOU -16(R9)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R8, R12
 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeSnappyBlockAsm64K:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R10, R10
+	XORL R11, R11
 
 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K
-	MOVQ (R8)(R10*1), R9
-	MOVQ 8(R8)(R10*1), R11
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	MOVQ 8(R9)(R11*1), R12
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
-	XORQ 8(BX)(R10*1), R11
+	XORQ 8(SI)(R11*1), R12
 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K
-	LEAL -16(DI), DI
-	LEAL 16(R10), R10
+	LEAL -16(R8), R8
+	LEAL 16(R11), R11
 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm64K
 
 matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm64K:
 #ifdef GOAMD64_v3
-	TZCNTQ R11, R11
+	TZCNTQ R12, R12
 
 #else
-	BSFQ R11, R11
+	BSFQ R12, R12
 
 #endif
-	SARQ $0x03, R11
-	LEAL 8(R10)(R11*1), R10
+	SARQ $0x03, R12
+	LEAL 8(R11)(R12*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 
 matchlen_match8_repeat_extend_encodeSnappyBlockAsm64K:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
-	MOVQ (R8)(R10*1), R9
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K
-	LEAL -8(DI), DI
-	LEAL 8(R10), R10
+	LEAL -8(R8), R8
+	LEAL 8(R11), R11
 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K
 
 matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm64K:
 #ifdef GOAMD64_v3
-	TZCNTQ R9, R9
+	TZCNTQ R10, R10
 
 #else
-	BSFQ R9, R9
+	BSFQ R10, R10
 
 #endif
-	SARQ $0x03, R9
-	LEAL (R10)(R9*1), R10
+	SARQ $0x03, R10
+	LEAL (R11)(R10*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm64K
 
 matchlen_match4_repeat_extend_encodeSnappyBlockAsm64K:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
-	MOVL (R8)(R10*1), R9
-	CMPL (BX)(R10*1), R9
+	MOVL (R9)(R11*1), R10
+	CMPL (SI)(R11*1), R10
 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K
-	LEAL -4(DI), DI
-	LEAL 4(R10), R10
+	LEAL -4(R8), R8
+	LEAL 4(R11), R11
 
 matchlen_match2_repeat_extend_encodeSnappyBlockAsm64K:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm64K
-	MOVW (R8)(R10*1), R9
-	CMPW (BX)(R10*1), R9
+	MOVW (R9)(R11*1), R10
+	CMPW (SI)(R11*1), R10
 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K
-	LEAL 2(R10), R10
-	SUBL $0x02, DI
+	LEAL 2(R11), R11
+	SUBL $0x02, R8
 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm64K
 
 matchlen_match1_repeat_extend_encodeSnappyBlockAsm64K:
-	MOVB (R8)(R10*1), R9
-	CMPB (BX)(R10*1), R9
+	MOVB (R9)(R11*1), R10
+	CMPB (SI)(R11*1), R10
 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm64K
-	LEAL 1(R10), R10
+	LEAL 1(R11), R11
 
 repeat_extend_forward_end_encodeSnappyBlockAsm64K:
-	ADDL R10, CX
-	MOVL CX, BX
-	SUBL SI, BX
-	MOVL 16(SP), SI
+	ADDL R11, DX
+	MOVL DX, SI
+	SUBL DI, SI
+	MOVL 16(SP), DI
 
 	// emitCopy
 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm64K
 
 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm64K:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeSnappyBlockAsm64K
 
 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm64K:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeSnappyBlockAsm64K:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeSnappyBlockAsm64K
 
 no_repeat_found_encodeSnappyBlockAsm64K:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeSnappyBlockAsm64K
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeSnappyBlockAsm64K
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeSnappyBlockAsm64K
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBlockAsm64K
 
 candidate3_match_encodeSnappyBlockAsm64K:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeSnappyBlockAsm64K
 
 candidate2_match_encodeSnappyBlockAsm64K:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeSnappyBlockAsm64K:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBlockAsm64K
 
 match_extend_back_loop_encodeSnappyBlockAsm64K:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBlockAsm64K
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBlockAsm64K
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBlockAsm64K
 	JMP  match_extend_back_loop_encodeSnappyBlockAsm64K
 
 match_extend_back_end_encodeSnappyBlockAsm64K:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBlockAsm64K
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBlockAsm64K:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBlockAsm64K
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBlockAsm64K
 	JB   three_bytes_match_emit_encodeSnappyBlockAsm64K
 
 three_bytes_match_emit_encodeSnappyBlockAsm64K:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
 
 two_bytes_match_emit_encodeSnappyBlockAsm64K:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeSnappyBlockAsm64K
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm64K
 
 one_byte_match_emit_encodeSnappyBlockAsm64K:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBlockAsm64K:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm64K_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBlockAsm64K:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm64K
 
 memmove_long_match_emit_encodeSnappyBlockAsm64K:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeSnappyBlockAsm64K:
 match_nolit_loop_encodeSnappyBlockAsm64K:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm64K
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm64K
 
 matchlen_bsf_16match_nolit_encodeSnappyBlockAsm64K:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm64K
 
 matchlen_match8_match_nolit_encodeSnappyBlockAsm64K:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm64K
 
 matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm64K:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm64K
 
 matchlen_match4_match_nolit_encodeSnappyBlockAsm64K:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm64K
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeSnappyBlockAsm64K:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
 	JB   match_nolit_end_encodeSnappyBlockAsm64K
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm64K
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeSnappyBlockAsm64K
 
 matchlen_match1_match_nolit_encodeSnappyBlockAsm64K:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeSnappyBlockAsm64K
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeSnappyBlockAsm64K:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBlockAsm64K:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm64K
 
 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm64K:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm64K
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm64K
 
 emit_copy_three_match_nolit_encodeSnappyBlockAsm64K:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBlockAsm64K:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBlockAsm64K
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm64K
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBlockAsm64K:
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x10, DI
-	IMULQ R8, DI
-	SHRQ  $0x32, DI
-	SHLQ  $0x10, BX
-	IMULQ R8, BX
-	SHRQ  $0x32, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x10, R8
+	IMULQ R9, R8
+	SHRQ  $0x32, R8
+	SHLQ  $0x10, SI
+	IMULQ R9, SI
+	SHRQ  $0x32, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeSnappyBlockAsm64K
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeSnappyBlockAsm64K
 
 emit_remainder_encodeSnappyBlockAsm64K:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBlockAsm64K
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBlockAsm64K:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm64K
@@ -12346,26 +12358,26 @@ emit_remainder_ok_encodeSnappyBlockAsm64K:
 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm64K
 
 three_bytes_emit_remainder_encodeSnappyBlockAsm64K:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
 
 two_bytes_emit_remainder_encodeSnappyBlockAsm64K:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBlockAsm64K
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm64K
 
 one_byte_emit_remainder_encodeSnappyBlockAsm64K:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBlockAsm64K:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -12381,73 +12393,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm64K:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 
-emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_4through7:
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm64K_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm64K:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K
 
 memmove_long_emit_remainder_encodeSnappyBlockAsm64K:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
 	MOVOU (SI), X4
@@ -12461,718 +12473,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm64Klarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBlockAsm64K:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBlockAsm12B(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm12B(dst []byte, src []byte, tmp *[16384]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm12B(SB), $16408-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000080, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm12B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000080, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBlockAsm12B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBlockAsm12B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBlockAsm12B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x05, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x05, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBlockAsm12B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x000000cf1bbcdcbb, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x18, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x000000cf1bbcdcbb, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x18, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x34, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x18, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x18, R11
+	IMULQ R9, R11
+	SHRQ  $0x34, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x18, R10
+	IMULQ R9, R10
+	SHRQ  $0x34, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeSnappyBlockAsm12B
-	LEAL  1(CX), SI
-	MOVL  12(SP), BX
-	MOVL  SI, DI
-	SUBL  16(SP), DI
+	LEAL  1(DX), DI
+	MOVL  12(SP), SI
+	MOVL  DI, R8
+	SUBL  16(SP), R8
 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm12B
 
 repeat_extend_back_loop_encodeSnappyBlockAsm12B:
-	CMPL SI, BX
+	CMPL DI, SI
 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm12B
-	MOVB -1(DX)(DI*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(R8*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm12B
-	LEAL -1(SI), SI
-	DECL DI
+	LEAL -1(DI), DI
+	DECL R8
 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm12B
 
 repeat_extend_back_end_encodeSnappyBlockAsm12B:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeSnappyBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeSnappyBlockAsm12B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm12B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm12B
 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm12B
 
 three_bytes_repeat_emit_encodeSnappyBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
 
 two_bytes_repeat_emit_encodeSnappyBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeSnappyBlockAsm12B
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm12B
 
 one_byte_repeat_emit_encodeSnappyBlockAsm12B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeSnappyBlockAsm12B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveShort
-	CMPQ DI, $0x08
+	CMPQ R8, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8
-	CMPQ DI, $0x10
+	CMPQ R8, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
-	CMPQ DI, $0x20
+	CMPQ R8, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8:
-	MOVQ (R8), R9
-	MOVQ R9, (AX)
+	MOVQ (R9), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
-	MOVQ (R8), R9
-	MOVQ -8(R8)(DI*1), R8
-	MOVQ R9, (AX)
-	MOVQ R8, -8(AX)(DI*1)
+	MOVQ (R9), R10
+	MOVQ -8(R9)(R8*1), R9
+	MOVQ R10, (CX)
+	MOVQ R9, -8(CX)(R8*1)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
-	MOVOU (R8), X0
-	MOVOU -16(R8)(DI*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU -16(R9)(R8*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R8*1)
 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
 
 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm12B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B
 
 memmove_long_repeat_emit_encodeSnappyBlockAsm12B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveLong
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVQ  DI, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVQ  R8, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(R8)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(R9)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(R8)(R11*1), X4
-	MOVOU -16(R8)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  DI, R11
+	MOVOU -32(R9)(R12*1), X4
+	MOVOU -16(R9)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R8, R12
 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeSnappyBlockAsm12B:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R10, R10
+	XORL R11, R11
 
 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B
-	MOVQ (R8)(R10*1), R9
-	MOVQ 8(R8)(R10*1), R11
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	MOVQ 8(R9)(R11*1), R12
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
-	XORQ 8(BX)(R10*1), R11
+	XORQ 8(SI)(R11*1), R12
 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B
-	LEAL -16(DI), DI
-	LEAL 16(R10), R10
+	LEAL -16(R8), R8
+	LEAL 16(R11), R11
 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm12B
 
 matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R11, R11
+	TZCNTQ R12, R12
 
 #else
-	BSFQ R11, R11
+	BSFQ R12, R12
 
 #endif
-	SARQ $0x03, R11
-	LEAL 8(R10)(R11*1), R10
+	SARQ $0x03, R12
+	LEAL 8(R11)(R12*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 
 matchlen_match8_repeat_extend_encodeSnappyBlockAsm12B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
-	MOVQ (R8)(R10*1), R9
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B
-	LEAL -8(DI), DI
-	LEAL 8(R10), R10
+	LEAL -8(R8), R8
+	LEAL 8(R11), R11
 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B
 
 matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R9, R9
+	TZCNTQ R10, R10
 
 #else
-	BSFQ R9, R9
+	BSFQ R10, R10
 
 #endif
-	SARQ $0x03, R9
-	LEAL (R10)(R9*1), R10
+	SARQ $0x03, R10
+	LEAL (R11)(R10*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm12B
 
 matchlen_match4_repeat_extend_encodeSnappyBlockAsm12B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
-	MOVL (R8)(R10*1), R9
-	CMPL (BX)(R10*1), R9
+	MOVL (R9)(R11*1), R10
+	CMPL (SI)(R11*1), R10
 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B
-	LEAL -4(DI), DI
-	LEAL 4(R10), R10
+	LEAL -4(R8), R8
+	LEAL 4(R11), R11
 
 matchlen_match2_repeat_extend_encodeSnappyBlockAsm12B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm12B
-	MOVW (R8)(R10*1), R9
-	CMPW (BX)(R10*1), R9
+	MOVW (R9)(R11*1), R10
+	CMPW (SI)(R11*1), R10
 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B
-	LEAL 2(R10), R10
-	SUBL $0x02, DI
+	LEAL 2(R11), R11
+	SUBL $0x02, R8
 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm12B
 
 matchlen_match1_repeat_extend_encodeSnappyBlockAsm12B:
-	MOVB (R8)(R10*1), R9
-	CMPB (BX)(R10*1), R9
+	MOVB (R9)(R11*1), R10
+	CMPB (SI)(R11*1), R10
 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm12B
-	LEAL 1(R10), R10
+	LEAL 1(R11), R11
 
 repeat_extend_forward_end_encodeSnappyBlockAsm12B:
-	ADDL R10, CX
-	MOVL CX, BX
-	SUBL SI, BX
-	MOVL 16(SP), SI
+	ADDL R11, DX
+	MOVL DX, SI
+	SUBL DI, SI
+	MOVL 16(SP), DI
 
 	// emitCopy
 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm12B
 
 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm12B:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeSnappyBlockAsm12B
 
 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm12B:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeSnappyBlockAsm12B:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeSnappyBlockAsm12B
 
 no_repeat_found_encodeSnappyBlockAsm12B:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeSnappyBlockAsm12B
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeSnappyBlockAsm12B
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeSnappyBlockAsm12B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBlockAsm12B
 
 candidate3_match_encodeSnappyBlockAsm12B:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeSnappyBlockAsm12B
 
 candidate2_match_encodeSnappyBlockAsm12B:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeSnappyBlockAsm12B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBlockAsm12B
 
 match_extend_back_loop_encodeSnappyBlockAsm12B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBlockAsm12B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBlockAsm12B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBlockAsm12B
 	JMP  match_extend_back_loop_encodeSnappyBlockAsm12B
 
 match_extend_back_end_encodeSnappyBlockAsm12B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBlockAsm12B:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBlockAsm12B
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBlockAsm12B
 	JB   three_bytes_match_emit_encodeSnappyBlockAsm12B
 
 three_bytes_match_emit_encodeSnappyBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
 
 two_bytes_match_emit_encodeSnappyBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeSnappyBlockAsm12B
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm12B
 
 one_byte_match_emit_encodeSnappyBlockAsm12B:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBlockAsm12B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm12B_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBlockAsm12B:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm12B
 
 memmove_long_match_emit_encodeSnappyBlockAsm12B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeSnappyBlockAsm12B:
 match_nolit_loop_encodeSnappyBlockAsm12B:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm12B
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm12B
 
 matchlen_bsf_16match_nolit_encodeSnappyBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm12B
 
 matchlen_match8_match_nolit_encodeSnappyBlockAsm12B:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm12B
 
 matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm12B
 
 matchlen_match4_match_nolit_encodeSnappyBlockAsm12B:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm12B
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeSnappyBlockAsm12B:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
 	JB   match_nolit_end_encodeSnappyBlockAsm12B
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm12B
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeSnappyBlockAsm12B
 
 matchlen_match1_match_nolit_encodeSnappyBlockAsm12B:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeSnappyBlockAsm12B
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeSnappyBlockAsm12B:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBlockAsm12B:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm12B
 
 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm12B:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm12B
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm12B
 
 emit_copy_three_match_nolit_encodeSnappyBlockAsm12B:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBlockAsm12B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBlockAsm12B
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBlockAsm12B:
-	MOVQ  $0x000000cf1bbcdcbb, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x18, DI
-	IMULQ R8, DI
-	SHRQ  $0x34, DI
-	SHLQ  $0x18, BX
-	IMULQ R8, BX
-	SHRQ  $0x34, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x000000cf1bbcdcbb, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x18, R8
+	IMULQ R9, R8
+	SHRQ  $0x34, R8
+	SHLQ  $0x18, SI
+	IMULQ R9, SI
+	SHRQ  $0x34, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeSnappyBlockAsm12B
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeSnappyBlockAsm12B
 
 emit_remainder_encodeSnappyBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm12B
@@ -13181,26 +13194,26 @@ emit_remainder_ok_encodeSnappyBlockAsm12B:
 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm12B
 
 three_bytes_emit_remainder_encodeSnappyBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
 
 two_bytes_emit_remainder_encodeSnappyBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBlockAsm12B
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm12B
 
 one_byte_emit_remainder_encodeSnappyBlockAsm12B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -13216,73 +13229,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm12B:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm12B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm12B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B
 
 memmove_long_emit_remainder_encodeSnappyBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -13296,718 +13309,719 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBlockAsm12B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBlockAsm10B(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm10B(dst []byte, src []byte, tmp *[4096]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm10B(SB), $4120-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000020, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm10B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000020, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBlockAsm10B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBlockAsm10B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBlockAsm10B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x05, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x05, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBlockAsm10B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x20, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x36, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x20, R11
+	IMULQ R9, R11
+	SHRQ  $0x36, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x20, R10
+	IMULQ R9, R10
+	SHRQ  $0x36, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeSnappyBlockAsm10B
-	LEAL  1(CX), SI
-	MOVL  12(SP), BX
-	MOVL  SI, DI
-	SUBL  16(SP), DI
+	LEAL  1(DX), DI
+	MOVL  12(SP), SI
+	MOVL  DI, R8
+	SUBL  16(SP), R8
 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm10B
 
 repeat_extend_back_loop_encodeSnappyBlockAsm10B:
-	CMPL SI, BX
+	CMPL DI, SI
 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm10B
-	MOVB -1(DX)(DI*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(R8*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm10B
-	LEAL -1(SI), SI
-	DECL DI
+	LEAL -1(DI), DI
+	DECL R8
 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm10B
 
 repeat_extend_back_end_encodeSnappyBlockAsm10B:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeSnappyBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeSnappyBlockAsm10B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm10B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm10B
 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm10B
 
 three_bytes_repeat_emit_encodeSnappyBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
 
 two_bytes_repeat_emit_encodeSnappyBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeSnappyBlockAsm10B
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm10B
 
 one_byte_repeat_emit_encodeSnappyBlockAsm10B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeSnappyBlockAsm10B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveShort
-	CMPQ DI, $0x08
+	CMPQ R8, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8
-	CMPQ DI, $0x10
+	CMPQ R8, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
-	CMPQ DI, $0x20
+	CMPQ R8, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8:
-	MOVQ (R8), R9
-	MOVQ R9, (AX)
+	MOVQ (R9), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
-	MOVQ (R8), R9
-	MOVQ -8(R8)(DI*1), R8
-	MOVQ R9, (AX)
-	MOVQ R8, -8(AX)(DI*1)
+	MOVQ (R9), R10
+	MOVQ -8(R9)(R8*1), R9
+	MOVQ R10, (CX)
+	MOVQ R9, -8(CX)(R8*1)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
-	MOVOU (R8), X0
-	MOVOU -16(R8)(DI*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU -16(R9)(R8*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R8*1)
 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
 
 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm10B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B
 
 memmove_long_repeat_emit_encodeSnappyBlockAsm10B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveLong
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVQ  DI, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVQ  R8, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(R8)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(R9)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(R8)(R11*1), X4
-	MOVOU -16(R8)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  DI, R11
+	MOVOU -32(R9)(R12*1), X4
+	MOVOU -16(R9)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R8, R12
 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeSnappyBlockAsm10B:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R10, R10
+	XORL R11, R11
 
 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B
-	MOVQ (R8)(R10*1), R9
-	MOVQ 8(R8)(R10*1), R11
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	MOVQ 8(R9)(R11*1), R12
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
-	XORQ 8(BX)(R10*1), R11
+	XORQ 8(SI)(R11*1), R12
 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B
-	LEAL -16(DI), DI
-	LEAL 16(R10), R10
+	LEAL -16(R8), R8
+	LEAL 16(R11), R11
 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm10B
 
 matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R11, R11
+	TZCNTQ R12, R12
 
 #else
-	BSFQ R11, R11
+	BSFQ R12, R12
 
 #endif
-	SARQ $0x03, R11
-	LEAL 8(R10)(R11*1), R10
+	SARQ $0x03, R12
+	LEAL 8(R11)(R12*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 
 matchlen_match8_repeat_extend_encodeSnappyBlockAsm10B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
-	MOVQ (R8)(R10*1), R9
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B
-	LEAL -8(DI), DI
-	LEAL 8(R10), R10
+	LEAL -8(R8), R8
+	LEAL 8(R11), R11
 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B
 
 matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R9, R9
+	TZCNTQ R10, R10
 
 #else
-	BSFQ R9, R9
+	BSFQ R10, R10
 
 #endif
-	SARQ $0x03, R9
-	LEAL (R10)(R9*1), R10
+	SARQ $0x03, R10
+	LEAL (R11)(R10*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm10B
 
 matchlen_match4_repeat_extend_encodeSnappyBlockAsm10B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
-	MOVL (R8)(R10*1), R9
-	CMPL (BX)(R10*1), R9
+	MOVL (R9)(R11*1), R10
+	CMPL (SI)(R11*1), R10
 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B
-	LEAL -4(DI), DI
-	LEAL 4(R10), R10
+	LEAL -4(R8), R8
+	LEAL 4(R11), R11
 
 matchlen_match2_repeat_extend_encodeSnappyBlockAsm10B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm10B
-	MOVW (R8)(R10*1), R9
-	CMPW (BX)(R10*1), R9
+	MOVW (R9)(R11*1), R10
+	CMPW (SI)(R11*1), R10
 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B
-	LEAL 2(R10), R10
-	SUBL $0x02, DI
+	LEAL 2(R11), R11
+	SUBL $0x02, R8
 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm10B
 
 matchlen_match1_repeat_extend_encodeSnappyBlockAsm10B:
-	MOVB (R8)(R10*1), R9
-	CMPB (BX)(R10*1), R9
+	MOVB (R9)(R11*1), R10
+	CMPB (SI)(R11*1), R10
 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm10B
-	LEAL 1(R10), R10
+	LEAL 1(R11), R11
 
 repeat_extend_forward_end_encodeSnappyBlockAsm10B:
-	ADDL R10, CX
-	MOVL CX, BX
-	SUBL SI, BX
-	MOVL 16(SP), SI
+	ADDL R11, DX
+	MOVL DX, SI
+	SUBL DI, SI
+	MOVL 16(SP), DI
 
 	// emitCopy
 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm10B
 
 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm10B:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeSnappyBlockAsm10B
 
 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm10B:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeSnappyBlockAsm10B:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeSnappyBlockAsm10B
 
 no_repeat_found_encodeSnappyBlockAsm10B:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeSnappyBlockAsm10B
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeSnappyBlockAsm10B
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeSnappyBlockAsm10B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBlockAsm10B
 
 candidate3_match_encodeSnappyBlockAsm10B:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeSnappyBlockAsm10B
 
 candidate2_match_encodeSnappyBlockAsm10B:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeSnappyBlockAsm10B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBlockAsm10B
 
 match_extend_back_loop_encodeSnappyBlockAsm10B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBlockAsm10B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBlockAsm10B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBlockAsm10B
 	JMP  match_extend_back_loop_encodeSnappyBlockAsm10B
 
 match_extend_back_end_encodeSnappyBlockAsm10B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBlockAsm10B:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBlockAsm10B
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBlockAsm10B
 	JB   three_bytes_match_emit_encodeSnappyBlockAsm10B
 
 three_bytes_match_emit_encodeSnappyBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
 
 two_bytes_match_emit_encodeSnappyBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeSnappyBlockAsm10B
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm10B
 
 one_byte_match_emit_encodeSnappyBlockAsm10B:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBlockAsm10B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm10B_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBlockAsm10B:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm10B
 
 memmove_long_match_emit_encodeSnappyBlockAsm10B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeSnappyBlockAsm10B:
 match_nolit_loop_encodeSnappyBlockAsm10B:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm10B
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm10B
 
 matchlen_bsf_16match_nolit_encodeSnappyBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm10B
 
 matchlen_match8_match_nolit_encodeSnappyBlockAsm10B:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm10B
 
 matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm10B
 
 matchlen_match4_match_nolit_encodeSnappyBlockAsm10B:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm10B
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeSnappyBlockAsm10B:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
 	JB   match_nolit_end_encodeSnappyBlockAsm10B
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm10B
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeSnappyBlockAsm10B
 
 matchlen_match1_match_nolit_encodeSnappyBlockAsm10B:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeSnappyBlockAsm10B
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeSnappyBlockAsm10B:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBlockAsm10B:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm10B
 
 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm10B:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm10B
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm10B
 
 emit_copy_three_match_nolit_encodeSnappyBlockAsm10B:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBlockAsm10B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBlockAsm10B
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBlockAsm10B:
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x20, DI
-	IMULQ R8, DI
-	SHRQ  $0x36, DI
-	SHLQ  $0x20, BX
-	IMULQ R8, BX
-	SHRQ  $0x36, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x20, R8
+	IMULQ R9, R8
+	SHRQ  $0x36, R8
+	SHLQ  $0x20, SI
+	IMULQ R9, SI
+	SHRQ  $0x36, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeSnappyBlockAsm10B
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeSnappyBlockAsm10B
 
 emit_remainder_encodeSnappyBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm10B
@@ -14016,26 +14030,26 @@ emit_remainder_ok_encodeSnappyBlockAsm10B:
 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm10B
 
 three_bytes_emit_remainder_encodeSnappyBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
 
 two_bytes_emit_remainder_encodeSnappyBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBlockAsm10B
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm10B
 
 one_byte_emit_remainder_encodeSnappyBlockAsm10B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -14051,73 +14065,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm10B:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm10B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm10B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B
 
 memmove_long_emit_remainder_encodeSnappyBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -14131,714 +14145,715 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBlockAsm10B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBlockAsm8B(dst []byte, src []byte) int
+// func encodeSnappyBlockAsm8B(dst []byte, src []byte, tmp *[1024]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBlockAsm8B(SB), $1048-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000008, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBlockAsm8B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000008, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBlockAsm8B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBlockAsm8B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBlockAsm8B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x04, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x04, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBlockAsm8B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x38, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x20, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x38, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x38, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x20, R11
+	IMULQ R9, R11
+	SHRQ  $0x38, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x20, R10
+	IMULQ R9, R10
+	SHRQ  $0x38, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_encodeSnappyBlockAsm8B
-	LEAL  1(CX), SI
-	MOVL  12(SP), BX
-	MOVL  SI, DI
-	SUBL  16(SP), DI
+	LEAL  1(DX), DI
+	MOVL  12(SP), SI
+	MOVL  DI, R8
+	SUBL  16(SP), R8
 	JZ    repeat_extend_back_end_encodeSnappyBlockAsm8B
 
 repeat_extend_back_loop_encodeSnappyBlockAsm8B:
-	CMPL SI, BX
+	CMPL DI, SI
 	JBE  repeat_extend_back_end_encodeSnappyBlockAsm8B
-	MOVB -1(DX)(DI*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(R8*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_encodeSnappyBlockAsm8B
-	LEAL -1(SI), SI
-	DECL DI
+	LEAL -1(DI), DI
+	DECL R8
 	JNZ  repeat_extend_back_loop_encodeSnappyBlockAsm8B
 
 repeat_extend_back_end_encodeSnappyBlockAsm8B:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_encodeSnappyBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 repeat_dst_size_check_encodeSnappyBlockAsm8B:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_encodeSnappyBlockAsm8B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_encodeSnappyBlockAsm8B
 	JB   three_bytes_repeat_emit_encodeSnappyBlockAsm8B
 
 three_bytes_repeat_emit_encodeSnappyBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
 
 two_bytes_repeat_emit_encodeSnappyBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_encodeSnappyBlockAsm8B
 	JMP  memmove_long_repeat_emit_encodeSnappyBlockAsm8B
 
 one_byte_repeat_emit_encodeSnappyBlockAsm8B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_encodeSnappyBlockAsm8B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveShort
-	CMPQ DI, $0x08
+	CMPQ R8, $0x08
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8
-	CMPQ DI, $0x10
+	CMPQ R8, $0x10
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
-	CMPQ DI, $0x20
+	CMPQ R8, $0x20
 	JBE  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
 	JMP  emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8:
-	MOVQ (R8), R9
-	MOVQ R9, (AX)
+	MOVQ (R9), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
-	MOVQ (R8), R9
-	MOVQ -8(R8)(DI*1), R8
-	MOVQ R9, (AX)
-	MOVQ R8, -8(AX)(DI*1)
+	MOVQ (R9), R10
+	MOVQ -8(R9)(R8*1), R9
+	MOVQ R10, (CX)
+	MOVQ R9, -8(CX)(R8*1)
 	JMP  memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
-	MOVOU (R8), X0
-	MOVOU -16(R8)(DI*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU -16(R9)(R8*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R8*1)
 	JMP   memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_repeat_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
 
 memmove_end_copy_repeat_emit_encodeSnappyBlockAsm8B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B
 
 memmove_long_repeat_emit_encodeSnappyBlockAsm8B:
-	LEAQ (AX)(DI*1), BX
+	LEAQ (CX)(R8*1), SI
 
 	// genMemMoveLong
-	MOVOU (R8), X0
-	MOVOU 16(R8), X1
-	MOVOU -32(R8)(DI*1), X2
-	MOVOU -16(R8)(DI*1), X3
-	MOVQ  DI, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (R9), X0
+	MOVOU 16(R9), X1
+	MOVOU -32(R9)(R8*1), X2
+	MOVOU -16(R9)(R8*1), X3
+	MOVQ  R8, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(R8)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(R9)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(R8)(R11*1), X4
-	MOVOU -16(R8)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  DI, R11
+	MOVOU -32(R9)(R12*1), X4
+	MOVOU -16(R9)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R8, R12
 	JAE   emit_lit_memmove_long_repeat_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(DI*1)
-	MOVOU X3, -16(AX)(DI*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R8*1)
+	MOVOU X3, -16(CX)(R8*1)
+	MOVQ  SI, CX
 
 emit_literal_done_repeat_emit_encodeSnappyBlockAsm8B:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R10, R10
+	XORL R11, R11
 
 matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B
-	MOVQ (R8)(R10*1), R9
-	MOVQ 8(R8)(R10*1), R11
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	MOVQ 8(R9)(R11*1), R12
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
-	XORQ 8(BX)(R10*1), R11
+	XORQ 8(SI)(R11*1), R12
 	JNZ  matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B
-	LEAL -16(DI), DI
-	LEAL 16(R10), R10
+	LEAL -16(R8), R8
+	LEAL 16(R11), R11
 	JMP  matchlen_loopback_16_repeat_extend_encodeSnappyBlockAsm8B
 
 matchlen_bsf_16repeat_extend_encodeSnappyBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R11, R11
+	TZCNTQ R12, R12
 
 #else
-	BSFQ R11, R11
+	BSFQ R12, R12
 
 #endif
-	SARQ $0x03, R11
-	LEAL 8(R10)(R11*1), R10
+	SARQ $0x03, R12
+	LEAL 8(R11)(R12*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 
 matchlen_match8_repeat_extend_encodeSnappyBlockAsm8B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
-	MOVQ (R8)(R10*1), R9
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B
-	LEAL -8(DI), DI
-	LEAL 8(R10), R10
+	LEAL -8(R8), R8
+	LEAL 8(R11), R11
 	JMP  matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B
 
 matchlen_bsf_8_repeat_extend_encodeSnappyBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R9, R9
+	TZCNTQ R10, R10
 
 #else
-	BSFQ R9, R9
+	BSFQ R10, R10
 
 #endif
-	SARQ $0x03, R9
-	LEAL (R10)(R9*1), R10
+	SARQ $0x03, R10
+	LEAL (R11)(R10*1), R11
 	JMP  repeat_extend_forward_end_encodeSnappyBlockAsm8B
 
 matchlen_match4_repeat_extend_encodeSnappyBlockAsm8B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
-	MOVL (R8)(R10*1), R9
-	CMPL (BX)(R10*1), R9
+	MOVL (R9)(R11*1), R10
+	CMPL (SI)(R11*1), R10
 	JNE  matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B
-	LEAL -4(DI), DI
-	LEAL 4(R10), R10
+	LEAL -4(R8), R8
+	LEAL 4(R11), R11
 
 matchlen_match2_repeat_extend_encodeSnappyBlockAsm8B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
 	JB   repeat_extend_forward_end_encodeSnappyBlockAsm8B
-	MOVW (R8)(R10*1), R9
-	CMPW (BX)(R10*1), R9
+	MOVW (R9)(R11*1), R10
+	CMPW (SI)(R11*1), R10
 	JNE  matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B
-	LEAL 2(R10), R10
-	SUBL $0x02, DI
+	LEAL 2(R11), R11
+	SUBL $0x02, R8
 	JZ   repeat_extend_forward_end_encodeSnappyBlockAsm8B
 
 matchlen_match1_repeat_extend_encodeSnappyBlockAsm8B:
-	MOVB (R8)(R10*1), R9
-	CMPB (BX)(R10*1), R9
+	MOVB (R9)(R11*1), R10
+	CMPB (SI)(R11*1), R10
 	JNE  repeat_extend_forward_end_encodeSnappyBlockAsm8B
-	LEAL 1(R10), R10
+	LEAL 1(R11), R11
 
 repeat_extend_forward_end_encodeSnappyBlockAsm8B:
-	ADDL R10, CX
-	MOVL CX, BX
-	SUBL SI, BX
-	MOVL 16(SP), SI
+	ADDL R11, DX
+	MOVL DX, SI
+	SUBL DI, SI
+	MOVL 16(SP), DI
 
 	// emitCopy
 two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B
-	MOVB $0xee, (AX)
-	MOVW SI, 1(AX)
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW DI, 1(CX)
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_repeat_as_copy_encodeSnappyBlockAsm8B
 
 two_byte_offset_short_repeat_as_copy_encodeSnappyBlockAsm8B:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B
-	LEAL -15(DI), DI
-	MOVB SI, 1(AX)
-	SHRL $0x08, SI
-	SHLL $0x05, SI
-	ORL  SI, DI
-	MOVB DI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(R8), R8
+	MOVB DI, 1(CX)
+	SHRL $0x08, DI
+	SHLL $0x05, DI
+	ORL  DI, R8
+	MOVB R8, (CX)
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_encodeSnappyBlockAsm8B
 
 emit_copy_three_repeat_as_copy_encodeSnappyBlockAsm8B:
-	LEAL -2(DI), DI
-	MOVB DI, (AX)
-	MOVW SI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(R8), R8
+	MOVB R8, (CX)
+	MOVW DI, 1(CX)
+	ADDQ $0x03, CX
 
 repeat_end_emit_encodeSnappyBlockAsm8B:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_encodeSnappyBlockAsm8B
 
 no_repeat_found_encodeSnappyBlockAsm8B:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_encodeSnappyBlockAsm8B
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_encodeSnappyBlockAsm8B
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_encodeSnappyBlockAsm8B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBlockAsm8B
 
 candidate3_match_encodeSnappyBlockAsm8B:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_encodeSnappyBlockAsm8B
 
 candidate2_match_encodeSnappyBlockAsm8B:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_encodeSnappyBlockAsm8B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBlockAsm8B
 
 match_extend_back_loop_encodeSnappyBlockAsm8B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBlockAsm8B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBlockAsm8B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBlockAsm8B
 	JMP  match_extend_back_loop_encodeSnappyBlockAsm8B
 
 match_extend_back_end_encodeSnappyBlockAsm8B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBlockAsm8B:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), DI
-	CMPL DI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), R8
+	CMPL R8, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBlockAsm8B
-	CMPL DI, $0x00000100
+	CMPL R8, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBlockAsm8B
 	JB   three_bytes_match_emit_encodeSnappyBlockAsm8B
 
 three_bytes_match_emit_encodeSnappyBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
 
 two_bytes_match_emit_encodeSnappyBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB DI, 1(AX)
-	ADDQ $0x02, AX
-	CMPL DI, $0x40
+	MOVB $0xf0, (CX)
+	MOVB R8, 1(CX)
+	ADDQ $0x02, CX
+	CMPL R8, $0x40
 	JB   memmove_match_emit_encodeSnappyBlockAsm8B
 	JMP  memmove_long_match_emit_encodeSnappyBlockAsm8B
 
 one_byte_match_emit_encodeSnappyBlockAsm8B:
-	SHLB $0x02, DI
-	MOVB DI, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, R8
+	MOVB R8, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBlockAsm8B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8:
-	MOVQ (SI), R9
-	MOVQ R9, (AX)
+	MOVQ (DI), R10
+	MOVQ R10, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_8through16:
-	MOVQ (SI), R9
-	MOVQ -8(SI)(R8*1), SI
-	MOVQ R9, (AX)
-	MOVQ SI, -8(AX)(R8*1)
+	MOVQ (DI), R10
+	MOVQ -8(DI)(R9*1), DI
+	MOVQ R10, (CX)
+	MOVQ DI, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_17through32:
-	MOVOU (SI), X0
-	MOVOU -16(SI)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU -16(DI)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeSnappyBlockAsm8B_memmove_move_33through64:
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBlockAsm8B:
-	MOVQ DI, AX
+	MOVQ R8, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBlockAsm8B
 
 memmove_long_match_emit_encodeSnappyBlockAsm8B:
-	LEAQ (AX)(R8*1), DI
+	LEAQ (CX)(R9*1), R8
 
 	// genMemMoveLong
-	MOVOU (SI), X0
-	MOVOU 16(SI), X1
-	MOVOU -32(SI)(R8*1), X2
-	MOVOU -16(SI)(R8*1), X3
-	MOVQ  R8, R10
-	SHRQ  $0x05, R10
-	MOVQ  AX, R9
-	ANDL  $0x0000001f, R9
-	MOVQ  $0x00000040, R11
-	SUBQ  R9, R11
-	DECQ  R10
+	MOVOU (DI), X0
+	MOVOU 16(DI), X1
+	MOVOU -32(DI)(R9*1), X2
+	MOVOU -16(DI)(R9*1), X3
+	MOVQ  R9, R11
+	SHRQ  $0x05, R11
+	MOVQ  CX, R10
+	ANDL  $0x0000001f, R10
+	MOVQ  $0x00000040, R12
+	SUBQ  R10, R12
+	DECQ  R11
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(SI)(R11*1), R9
-	LEAQ  -32(AX)(R11*1), R12
+	LEAQ  -32(DI)(R12*1), R10
+	LEAQ  -32(CX)(R12*1), R13
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back:
-	MOVOU (R9), X4
-	MOVOU 16(R9), X5
-	MOVOA X4, (R12)
-	MOVOA X5, 16(R12)
+	MOVOU (R10), X4
+	MOVOU 16(R10), X5
+	MOVOA X4, (R13)
+	MOVOA X5, 16(R13)
+	ADDQ  $0x20, R13
+	ADDQ  $0x20, R10
 	ADDQ  $0x20, R12
-	ADDQ  $0x20, R9
-	ADDQ  $0x20, R11
-	DECQ  R10
+	DECQ  R11
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(SI)(R11*1), X4
-	MOVOU -16(SI)(R11*1), X5
-	MOVOA X4, -32(AX)(R11*1)
-	MOVOA X5, -16(AX)(R11*1)
-	ADDQ  $0x20, R11
-	CMPQ  R8, R11
+	MOVOU -32(DI)(R12*1), X4
+	MOVOU -16(DI)(R12*1), X5
+	MOVOA X4, -32(CX)(R12*1)
+	MOVOA X5, -16(CX)(R12*1)
+	ADDQ  $0x20, R12
+	CMPQ  R9, R12
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  DI, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  R8, CX
 
 emit_literal_done_match_emit_encodeSnappyBlockAsm8B:
 match_nolit_loop_encodeSnappyBlockAsm8B:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBlockAsm8B
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBlockAsm8B
 
 matchlen_bsf_16match_nolit_encodeSnappyBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm8B
 
 matchlen_match8_match_nolit_encodeSnappyBlockAsm8B:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_encodeSnappyBlockAsm8B
 
 matchlen_bsf_8_match_nolit_encodeSnappyBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_encodeSnappyBlockAsm8B
 
 matchlen_match4_match_nolit_encodeSnappyBlockAsm8B:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_encodeSnappyBlockAsm8B
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_encodeSnappyBlockAsm8B:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
 	JB   match_nolit_end_encodeSnappyBlockAsm8B
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_encodeSnappyBlockAsm8B
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_encodeSnappyBlockAsm8B
 
 matchlen_match1_match_nolit_encodeSnappyBlockAsm8B:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_encodeSnappyBlockAsm8B
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_encodeSnappyBlockAsm8B:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBlockAsm8B:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B
-	MOVB $0xee, (AX)
-	MOVW BX, 1(AX)
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW SI, 1(CX)
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBlockAsm8B
 
 two_byte_offset_short_match_nolit_encodeSnappyBlockAsm8B:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBlockAsm8B
-	LEAL -15(SI), SI
-	MOVB BL, 1(AX)
-	SHRL $0x08, BX
-	SHLL $0x05, BX
-	ORL  BX, SI
-	MOVB SI, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(DI), DI
+	MOVB SI, 1(CX)
+	SHRL $0x08, SI
+	SHLL $0x05, SI
+	ORL  SI, DI
+	MOVB DI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBlockAsm8B
 
 emit_copy_three_match_nolit_encodeSnappyBlockAsm8B:
-	LEAL -2(SI), SI
-	MOVB SI, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(DI), DI
+	MOVB DI, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBlockAsm8B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBlockAsm8B
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBlockAsm8B:
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x20, DI
-	IMULQ R8, DI
-	SHRQ  $0x38, DI
-	SHLQ  $0x20, BX
-	IMULQ R8, BX
-	SHRQ  $0x38, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x20, R8
+	IMULQ R9, R8
+	SHRQ  $0x38, R8
+	SHLQ  $0x20, SI
+	IMULQ R9, SI
+	SHRQ  $0x38, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_encodeSnappyBlockAsm8B
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_encodeSnappyBlockAsm8B
 
 emit_remainder_encodeSnappyBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBlockAsm8B
@@ -14847,26 +14862,26 @@ emit_remainder_ok_encodeSnappyBlockAsm8B:
 	JB   three_bytes_emit_remainder_encodeSnappyBlockAsm8B
 
 three_bytes_emit_remainder_encodeSnappyBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
 
 two_bytes_emit_remainder_encodeSnappyBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBlockAsm8B
 	JMP  memmove_long_emit_remainder_encodeSnappyBlockAsm8B
 
 one_byte_emit_remainder_encodeSnappyBlockAsm8B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -14882,73 +14897,73 @@ memmove_emit_remainder_encodeSnappyBlockAsm8B:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBlockAsm8B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBlockAsm8B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B
 
 memmove_long_emit_remainder_encodeSnappyBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -14962,520 +14977,521 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back:
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBlockAsm8B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBetterBlockAsm(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm(dst []byte, src []byte, tmp *[589824]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm(SB), $589848-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00001200, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00001200, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBetterBlockAsm:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBetterBlockAsm
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBetterBlockAsm:
-	MOVL CX, BX
-	SUBL 12(SP), BX
-	SHRL $0x07, BX
-	CMPL BX, $0x63
+	MOVL DX, SI
+	SUBL 12(SP), SI
+	SHRL $0x07, SI
+	CMPL SI, $0x63
 	JBE  check_maxskip_ok_encodeSnappyBetterBlockAsm
-	LEAL 100(CX), BX
+	LEAL 100(DX), SI
 	JMP  check_maxskip_cont_encodeSnappyBetterBlockAsm
 
 check_maxskip_ok_encodeSnappyBetterBlockAsm:
-	LEAL 1(CX)(BX*1), BX
+	LEAL 1(DX)(SI*1), SI
 
 check_maxskip_cont_encodeSnappyBetterBlockAsm:
-	CMPL  BX, 8(SP)
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBetterBlockAsm
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x00cf1bbcdcbfa563, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  524312(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 524312(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x00cf1bbcdcbfa563, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x2f, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x32, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  524288(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 524288(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeSnappyBetterBlockAsm
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeSnappyBetterBlockAsm
 
 no_short_found_encodeSnappyBetterBlockAsm:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeSnappyBetterBlockAsm
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBetterBlockAsm
 
 candidateS_match_encodeSnappyBetterBlockAsm:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x2f, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x2f, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeSnappyBetterBlockAsm:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm
 
 match_extend_back_loop_encodeSnappyBetterBlockAsm:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm
 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm
 
 match_extend_back_end_encodeSnappyBetterBlockAsm:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 5(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 5(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBetterBlockAsm:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm
 
 matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
 
 matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm
 
 matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm
 
 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
 	JB   match_nolit_end_encodeSnappyBetterBlockAsm
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm
 
 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeSnappyBetterBlockAsm:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	CMPL R11, $0x01
+	CMPL R12, $0x01
 	JA   match_length_ok_encodeSnappyBetterBlockAsm
-	CMPL DI, $0x0000ffff
+	CMPL R8, $0x0000ffff
 	JBE  match_length_ok_encodeSnappyBetterBlockAsm
-	MOVL 20(SP), CX
-	INCL CX
+	MOVL 20(SP), DX
+	INCL DX
 	JMP  search_loop_encodeSnappyBetterBlockAsm
 
 match_length_ok_encodeSnappyBetterBlockAsm:
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm
-	CMPL BX, $0x01000000
+	CMPL SI, $0x01000000
 	JB   four_bytes_match_emit_encodeSnappyBetterBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL BX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL SI, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 
 four_bytes_match_emit_encodeSnappyBetterBlockAsm:
-	MOVL BX, R10
-	SHRL $0x10, R10
-	MOVB $0xf8, (AX)
-	MOVW BX, 1(AX)
-	MOVB R10, 3(AX)
-	ADDQ $0x04, AX
+	MOVL SI, R11
+	SHRL $0x10, R11
+	MOVB $0xf8, (CX)
+	MOVW SI, 1(CX)
+	MOVB R11, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 
 three_bytes_match_emit_encodeSnappyBetterBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 
 two_bytes_match_emit_encodeSnappyBetterBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm
 
 one_byte_match_emit_encodeSnappyBetterBlockAsm:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBetterBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm
 
 memmove_long_match_emit_encodeSnappyBetterBlockAsm:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL DI, $0x00010000
+	CMPL R8, $0x00010000
 	JB   two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
 
 four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
-	MOVB $0xff, (AX)
-	MOVL DI, 1(AX)
-	LEAL -64(R11), R11
-	ADDQ $0x05, AX
-	CMPL R11, $0x04
+	MOVB $0xff, (CX)
+	MOVL R8, 1(CX)
+	LEAL -64(R12), R12
+	ADDQ $0x05, CX
+	CMPL R12, $0x04
 	JB   four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm
 	JMP  four_bytes_loop_back_match_nolit_encodeSnappyBetterBlockAsm
 
 four_bytes_remain_match_nolit_encodeSnappyBetterBlockAsm:
-	TESTL R11, R11
+	TESTL R12, R12
 	JZ    match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
-	XORL  BX, BX
-	LEAL  -1(BX)(R11*4), R11
-	MOVB  R11, (AX)
-	MOVL  DI, 1(AX)
-	ADDQ  $0x05, AX
+	XORL  SI, SI
+	LEAL  -1(SI)(R12*4), R12
+	MOVB  R12, (CX)
+	MOVL  R8, 1(CX)
+	ADDQ  $0x05, CX
 	JMP   match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 
 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm
 
 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm
 
 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBetterBlockAsm
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBetterBlockAsm:
-	MOVQ  $0x00cf1bbcdcbfa563, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x2f, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x32, R10
-	SHLQ  $0x08, R11
-	IMULQ BX, R11
-	SHRQ  $0x2f, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x32, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 524312(SP)(R10*4)
-	MOVL  R13, 524312(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x00cf1bbcdcbfa563, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x08, R10
+	IMULQ SI, R10
+	SHRQ  $0x2f, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x32, R11
+	SHLQ  $0x08, R12
+	IMULQ SI, R12
+	SHRQ  $0x2f, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x32, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 524288(AX)(R11*4)
+	MOVL  R14, 524288(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeSnappyBetterBlockAsm:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeSnappyBetterBlockAsm
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x2f, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x08, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x2f, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x08, R11
+	IMULQ SI, R11
+	SHRQ  $0x2f, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeSnappyBetterBlockAsm
 
 emit_remainder_encodeSnappyBetterBlockAsm:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 5(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 5(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBetterBlockAsm:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm
@@ -15485,41 +15501,41 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm:
 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm
 	CMPL DX, $0x01000000
 	JB   four_bytes_emit_remainder_encodeSnappyBetterBlockAsm
-	MOVB $0xfc, (AX)
-	MOVL DX, 1(AX)
-	ADDQ $0x05, AX
+	MOVB $0xfc, (CX)
+	MOVL DX, 1(CX)
+	ADDQ $0x05, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 
 four_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
 	MOVL DX, BX
 	SHRL $0x10, BX
-	MOVB $0xf8, (AX)
-	MOVW DX, 1(AX)
-	MOVB BL, 3(AX)
-	ADDQ $0x04, AX
+	MOVB $0xf8, (CX)
+	MOVW DX, 1(CX)
+	MOVB BL, 3(CX)
+	ADDQ $0x04, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 
 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 
 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm
 
 one_byte_emit_remainder_encodeSnappyBetterBlockAsm:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBetterBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -15535,73 +15551,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm
 
 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back:
 	MOVOU (SI), X4
@@ -15615,463 +15631,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_ba
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsmlarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm64K(dst []byte, src []byte, tmp *[294912]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm64K(SB), $327704-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000a00, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm64K(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000900, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBetterBlockAsm64K:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBetterBlockAsm64K
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBetterBlockAsm64K:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x07, BX
-	LEAL  1(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x07, SI
+	LEAL  1(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBetterBlockAsm64K
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x00cf1bbcdcbfa563, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x30, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x32, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  262168(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 262168(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x00cf1bbcdcbfa563, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x30, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x33, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  262144(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 262144(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeSnappyBetterBlockAsm64K
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeSnappyBetterBlockAsm64K
 
 no_short_found_encodeSnappyBetterBlockAsm64K:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeSnappyBetterBlockAsm64K
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm64K
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBetterBlockAsm64K
 
 candidateS_match_encodeSnappyBetterBlockAsm64K:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x08, R9
-	IMULQ R8, R9
-	SHRQ  $0x30, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x08, R10
+	IMULQ R9, R10
+	SHRQ  $0x30, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm64K
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeSnappyBetterBlockAsm64K:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm64K
 
 match_extend_back_loop_encodeSnappyBetterBlockAsm64K:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm64K
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm64K
 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm64K
 
 match_extend_back_end_encodeSnappyBetterBlockAsm64K:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm64K
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBetterBlockAsm64K:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm64K
 
 matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm64K:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
 
 matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm64K:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K
 
 matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm64K:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm64K
 
 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm64K:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm64K:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
 	JB   match_nolit_end_encodeSnappyBetterBlockAsm64K
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm64K
 
 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm64K:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm64K
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeSnappyBetterBlockAsm64K:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm64K
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm64K
 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm64K
 
 three_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
 
 two_bytes_match_emit_encodeSnappyBetterBlockAsm64K:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm64K
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm64K
 
 one_byte_match_emit_encodeSnappyBetterBlockAsm64K:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBetterBlockAsm64K:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm64K:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K
 
 memmove_long_match_emit_encodeSnappyBetterBlockAsm64K:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
-
-emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+
+emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm64K:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm64K
 
 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm64K:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K
 
 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm64K:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm64K:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBetterBlockAsm64K
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBetterBlockAsm64K:
-	MOVQ  $0x00cf1bbcdcbfa563, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x30, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x32, R10
-	SHLQ  $0x08, R11
-	IMULQ BX, R11
-	SHRQ  $0x30, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x32, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 262168(SP)(R10*4)
-	MOVL  R13, 262168(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x00cf1bbcdcbfa563, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x08, R10
+	IMULQ SI, R10
+	SHRQ  $0x30, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x33, R11
+	SHLQ  $0x08, R12
+	IMULQ SI, R12
+	SHRQ  $0x30, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x33, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 262144(AX)(R11*4)
+	MOVL  R14, 262144(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeSnappyBetterBlockAsm64K:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeSnappyBetterBlockAsm64K
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x08, R9
-	IMULQ BX, R9
-	SHRQ  $0x30, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x08, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x30, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x08, R11
+	IMULQ SI, R11
+	SHRQ  $0x30, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeSnappyBetterBlockAsm64K
 
 emit_remainder_encodeSnappyBetterBlockAsm64K:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm64K
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K
@@ -16080,26 +16097,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm64K:
 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm64K:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm64K
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 one_byte_emit_remainder_encodeSnappyBetterBlockAsm64K:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -16115,73 +16132,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm64K:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm64K_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm64K:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K
 
 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64K:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back:
 	MOVOU (SI), X4
@@ -16195,463 +16212,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm64Klarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm64K:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm12B(dst []byte, src []byte, tmp *[81920]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm12B(SB), $81944-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000280, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm12B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000280, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBetterBlockAsm12B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBetterBlockAsm12B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBetterBlockAsm12B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x06, BX
-	LEAL  1(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x06, SI
+	LEAL  1(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBetterBlockAsm12B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x34, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  65560(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 65560(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x34, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  65536(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 65536(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeSnappyBetterBlockAsm12B
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeSnappyBetterBlockAsm12B
 
 no_short_found_encodeSnappyBetterBlockAsm12B:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeSnappyBetterBlockAsm12B
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm12B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBetterBlockAsm12B
 
 candidateS_match_encodeSnappyBetterBlockAsm12B:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x32, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x32, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm12B
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeSnappyBetterBlockAsm12B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm12B
 
 match_extend_back_loop_encodeSnappyBetterBlockAsm12B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm12B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm12B
 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm12B
 
 match_extend_back_end_encodeSnappyBetterBlockAsm12B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBetterBlockAsm12B:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm12B
 
 matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
 
 matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm12B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B
 
 matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm12B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm12B
 
 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm12B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm12B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
 	JB   match_nolit_end_encodeSnappyBetterBlockAsm12B
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm12B
 
 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm12B:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm12B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeSnappyBetterBlockAsm12B:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm12B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm12B
 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm12B
 
 three_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
 
 two_bytes_match_emit_encodeSnappyBetterBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm12B
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm12B
 
 one_byte_match_emit_encodeSnappyBetterBlockAsm12B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBetterBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm12B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B
 
 memmove_long_match_emit_encodeSnappyBetterBlockAsm12B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm12B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm12B
 
 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm12B:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B
 
 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm12B:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm12B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBetterBlockAsm12B
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBetterBlockAsm12B:
-	MOVQ  $0x0000cf1bbcdcbf9b, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x32, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x34, R10
-	SHLQ  $0x10, R11
-	IMULQ BX, R11
-	SHRQ  $0x32, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x34, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 65560(SP)(R10*4)
-	MOVL  R13, 65560(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x0000cf1bbcdcbf9b, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x10, R10
+	IMULQ SI, R10
+	SHRQ  $0x32, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x34, R11
+	SHLQ  $0x10, R12
+	IMULQ SI, R12
+	SHRQ  $0x32, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x34, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 65536(AX)(R11*4)
+	MOVL  R14, 65536(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeSnappyBetterBlockAsm12B:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeSnappyBetterBlockAsm12B
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x32, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x10, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x32, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x10, R11
+	IMULQ SI, R11
+	SHRQ  $0x32, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeSnappyBetterBlockAsm12B
 
 emit_remainder_encodeSnappyBetterBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm12B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B
@@ -16660,26 +16678,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm12B:
 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm12B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm12B
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 one_byte_emit_remainder_encodeSnappyBetterBlockAsm12B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -16695,73 +16713,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm12B:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
-	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
-
-emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
+	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B
+
+emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm12B_memmove_move_33through64:
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm12B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B
 
 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -16775,463 +16793,464 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm12Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm12B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm10B(dst []byte, src []byte, tmp *[20480]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm10B(SB), $20504-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x000000a0, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm10B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x000000a0, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBetterBlockAsm10B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBetterBlockAsm10B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBetterBlockAsm10B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x05, BX
-	LEAL  1(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x05, SI
+	LEAL  1(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBetterBlockAsm10B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x36, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  16408(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 16408(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x34, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x36, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  16384(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 16384(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeSnappyBetterBlockAsm10B
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeSnappyBetterBlockAsm10B
 
 no_short_found_encodeSnappyBetterBlockAsm10B:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeSnappyBetterBlockAsm10B
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm10B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBetterBlockAsm10B
 
 candidateS_match_encodeSnappyBetterBlockAsm10B:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x34, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x34, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm10B
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeSnappyBetterBlockAsm10B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm10B
 
 match_extend_back_loop_encodeSnappyBetterBlockAsm10B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm10B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm10B
 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm10B
 
 match_extend_back_end_encodeSnappyBetterBlockAsm10B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBetterBlockAsm10B:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm10B
 
 matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
 
 matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm10B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B
 
 matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm10B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm10B
 
 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm10B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
 	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm10B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
 	JB   match_nolit_end_encodeSnappyBetterBlockAsm10B
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm10B
 
 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm10B:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm10B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeSnappyBetterBlockAsm10B:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm10B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm10B
 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm10B
 
 three_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
 
 two_bytes_match_emit_encodeSnappyBetterBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm10B
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm10B
 
 one_byte_match_emit_encodeSnappyBetterBlockAsm10B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBetterBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm10B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B
 
 memmove_long_match_emit_encodeSnappyBetterBlockAsm10B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm10B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm10B
 
 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm10B:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
-	CMPL DI, $0x00000800
+	CMPL R8, $0x00000800
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B
 
 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm10B:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm10B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBetterBlockAsm10B
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBetterBlockAsm10B:
-	MOVQ  $0x0000cf1bbcdcbf9b, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x34, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x36, R10
-	SHLQ  $0x10, R11
-	IMULQ BX, R11
-	SHRQ  $0x34, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x36, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 16408(SP)(R10*4)
-	MOVL  R13, 16408(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x0000cf1bbcdcbf9b, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x10, R10
+	IMULQ SI, R10
+	SHRQ  $0x34, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x36, R11
+	SHLQ  $0x10, R12
+	IMULQ SI, R12
+	SHRQ  $0x34, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x36, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 16384(AX)(R11*4)
+	MOVL  R14, 16384(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeSnappyBetterBlockAsm10B:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeSnappyBetterBlockAsm10B
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x34, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x10, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x34, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x10, R11
+	IMULQ SI, R11
+	SHRQ  $0x34, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeSnappyBetterBlockAsm10B
 
 emit_remainder_encodeSnappyBetterBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm10B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B
@@ -17240,26 +17259,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm10B:
 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm10B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm10B
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 one_byte_emit_remainder_encodeSnappyBetterBlockAsm10B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -17275,73 +17294,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm10B:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm10B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm10B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B
 
 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -17355,461 +17374,462 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm10Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm10B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte) int
+// func encodeSnappyBetterBlockAsm8B(dst []byte, src []byte, tmp *[5120]byte) int
 // Requires: BMI, SSE2
-TEXT ·encodeSnappyBetterBlockAsm8B(SB), $5144-56
-	MOVQ dst_base+0(FP), AX
-	MOVQ $0x00000028, CX
-	LEAQ 24(SP), DX
+TEXT ·encodeSnappyBetterBlockAsm8B(SB), $24-64
+	MOVQ tmp+48(FP), AX
+	MOVQ dst_base+0(FP), CX
+	MOVQ $0x00000028, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_encodeSnappyBetterBlockAsm8B:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_encodeSnappyBetterBlockAsm8B
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+32(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
+	MOVQ  src_len+32(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
 	MOVL  $0x00000000, 16(SP)
-	MOVQ  src_base+24(FP), DX
+	MOVQ  src_base+24(FP), BX
 
 search_loop_encodeSnappyBetterBlockAsm8B:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x04, BX
-	LEAL  1(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x04, SI
+	LEAL  1(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_encodeSnappyBetterBlockAsm8B
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  $0x9e3779b1, BX
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
-	SHLQ  $0x20, R10
-	IMULQ BX, R10
-	SHRQ  $0x38, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  4120(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	MOVL  CX, 4120(SP)(R10*4)
-	MOVQ  (DX)(BX*1), R9
-	MOVQ  (DX)(DI*1), R10
-	CMPQ  R9, SI
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  $0x9e3779b1, SI
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x36, R10
+	SHLQ  $0x20, R11
+	IMULQ SI, R11
+	SHRQ  $0x38, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  4096(AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	MOVL  DX, 4096(AX)(R11*4)
+	MOVQ  (BX)(SI*1), R10
+	MOVQ  (BX)(R8*1), R11
+	CMPQ  R10, DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
-	CMPQ  R10, SI
+	CMPQ  R11, DI
 	JNE   no_short_found_encodeSnappyBetterBlockAsm8B
-	MOVL  DI, BX
+	MOVL  R8, SI
 	JMP   candidate_match_encodeSnappyBetterBlockAsm8B
 
 no_short_found_encodeSnappyBetterBlockAsm8B:
-	CMPL R9, SI
+	CMPL R10, DI
 	JEQ  candidate_match_encodeSnappyBetterBlockAsm8B
-	CMPL R10, SI
+	CMPL R11, DI
 	JEQ  candidateS_match_encodeSnappyBetterBlockAsm8B
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_encodeSnappyBetterBlockAsm8B
 
 candidateS_match_encodeSnappyBetterBlockAsm8B:
-	SHRQ  $0x08, SI
-	MOVQ  SI, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x36, R9
-	MOVL  24(SP)(R9*4), BX
-	INCL  CX
-	MOVL  CX, 24(SP)(R9*4)
-	CMPL  (DX)(BX*1), SI
+	SHRQ  $0x08, DI
+	MOVQ  DI, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x36, R10
+	MOVL  (AX)(R10*4), SI
+	INCL  DX
+	MOVL  DX, (AX)(R10*4)
+	CMPL  (BX)(SI*1), DI
 	JEQ   candidate_match_encodeSnappyBetterBlockAsm8B
-	DECL  CX
-	MOVL  DI, BX
+	DECL  DX
+	MOVL  R8, SI
 
 candidate_match_encodeSnappyBetterBlockAsm8B:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_encodeSnappyBetterBlockAsm8B
 
 match_extend_back_loop_encodeSnappyBetterBlockAsm8B:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_encodeSnappyBetterBlockAsm8B
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_encodeSnappyBetterBlockAsm8B
 	JMP  match_extend_back_loop_encodeSnappyBetterBlockAsm8B
 
 match_extend_back_end_encodeSnappyBetterBlockAsm8B:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_encodeSnappyBetterBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_dst_size_check_encodeSnappyBetterBlockAsm8B:
-	MOVL CX, SI
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+32(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), R9
+	MOVL DX, DI
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+32(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), R10
 
 	// matchLen
-	XORL R11, R11
+	XORL R12, R12
 
 matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B
-	MOVQ (R8)(R11*1), R10
-	MOVQ 8(R8)(R11*1), R12
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	MOVQ 8(R9)(R12*1), R13
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
-	XORQ 8(R9)(R11*1), R12
+	XORQ 8(R10)(R12*1), R13
 	JNZ  matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B
-	LEAL -16(DI), DI
-	LEAL 16(R11), R11
+	LEAL -16(R8), R8
+	LEAL 16(R12), R12
 	JMP  matchlen_loopback_16_match_nolit_encodeSnappyBetterBlockAsm8B
 
 matchlen_bsf_16match_nolit_encodeSnappyBetterBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R12, R12
+	TZCNTQ R13, R13
 
 #else
-	BSFQ R12, R12
+	BSFQ R13, R13
 
 #endif
-	SARQ $0x03, R12
-	LEAL 8(R11)(R12*1), R11
+	SARQ $0x03, R13
+	LEAL 8(R12)(R13*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
 
 matchlen_match8_match_nolit_encodeSnappyBetterBlockAsm8B:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
-	MOVQ (R8)(R11*1), R10
-	XORQ (R9)(R11*1), R10
+	MOVQ (R9)(R12*1), R11
+	XORQ (R10)(R12*1), R11
 	JNZ  matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B
-	LEAL -8(DI), DI
-	LEAL 8(R11), R11
+	LEAL -8(R8), R8
+	LEAL 8(R12), R12
 	JMP  matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B
 
 matchlen_bsf_8_match_nolit_encodeSnappyBetterBlockAsm8B:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL (R11)(R10*1), R11
+	SARQ $0x03, R11
+	LEAL (R12)(R11*1), R12
 	JMP  match_nolit_end_encodeSnappyBetterBlockAsm8B
 
 matchlen_match4_match_nolit_encodeSnappyBetterBlockAsm8B:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
-	MOVL (R8)(R11*1), R10
-	CMPL (R9)(R11*1), R10
-	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
-	LEAL -4(DI), DI
-	LEAL 4(R11), R11
+	MOVL (R9)(R12*1), R11
+	CMPL (R10)(R12*1), R11
+	JNE  matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B
+	LEAL -4(R8), R8
+	LEAL 4(R12), R12
 
 matchlen_match2_match_nolit_encodeSnappyBetterBlockAsm8B:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
 	JB   match_nolit_end_encodeSnappyBetterBlockAsm8B
-	MOVW (R8)(R11*1), R10
-	CMPW (R9)(R11*1), R10
+	MOVW (R9)(R12*1), R11
+	CMPW (R10)(R12*1), R11
 	JNE  matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B
-	LEAL 2(R11), R11
-	SUBL $0x02, DI
+	LEAL 2(R12), R12
+	SUBL $0x02, R8
 	JZ   match_nolit_end_encodeSnappyBetterBlockAsm8B
 
 matchlen_match1_match_nolit_encodeSnappyBetterBlockAsm8B:
-	MOVB (R8)(R11*1), R10
-	CMPB (R9)(R11*1), R10
+	MOVB (R9)(R12*1), R11
+	CMPB (R10)(R12*1), R11
 	JNE  match_nolit_end_encodeSnappyBetterBlockAsm8B
-	LEAL 1(R11), R11
+	LEAL 1(R12), R12
 
 match_nolit_end_encodeSnappyBetterBlockAsm8B:
-	MOVL CX, DI
-	SUBL BX, DI
+	MOVL DX, R8
+	SUBL SI, R8
 
 	// Check if repeat
-	MOVL DI, 16(SP)
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL R8, 16(SP)
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R9
-	SUBL BX, R8
-	LEAL -1(R8), BX
-	CMPL BX, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R10
+	SUBL SI, R9
+	LEAL -1(R9), SI
+	CMPL SI, $0x3c
 	JB   one_byte_match_emit_encodeSnappyBetterBlockAsm8B
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_match_emit_encodeSnappyBetterBlockAsm8B
 	JB   three_bytes_match_emit_encodeSnappyBetterBlockAsm8B
 
 three_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW BX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW SI, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
 
 two_bytes_match_emit_encodeSnappyBetterBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB BL, 1(AX)
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	MOVB $0xf0, (CX)
+	MOVB SI, 1(CX)
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_match_emit_encodeSnappyBetterBlockAsm8B
 	JMP  memmove_long_match_emit_encodeSnappyBetterBlockAsm8B
 
 one_byte_match_emit_encodeSnappyBetterBlockAsm8B:
-	SHLB $0x02, BL
-	MOVB BL, (AX)
-	ADDQ $0x01, AX
+	SHLB $0x02, SI
+	MOVB SI, (CX)
+	ADDQ $0x01, CX
 
 memmove_match_emit_encodeSnappyBetterBlockAsm8B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveShort
-	CMPQ R8, $0x08
+	CMPQ R9, $0x08
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8
-	CMPQ R8, $0x10
+	CMPQ R9, $0x10
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16
-	CMPQ R8, $0x20
+	CMPQ R9, $0x20
 	JBE  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32
 	JMP  emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8:
-	MOVQ (R9), R10
-	MOVQ R10, (AX)
+	MOVQ (R10), R11
+	MOVQ R11, (CX)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
-	MOVQ (R9), R10
-	MOVQ -8(R9)(R8*1), R9
-	MOVQ R10, (AX)
-	MOVQ R9, -8(AX)(R8*1)
+	MOVQ (R10), R11
+	MOVQ -8(R10)(R9*1), R10
+	MOVQ R11, (CX)
+	MOVQ R10, -8(CX)(R9*1)
 	JMP  memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
-	MOVOU (R9), X0
-	MOVOU -16(R9)(R8*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU -16(R10)(R9*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(R9*1)
 	JMP   memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_match_emit_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
 
 memmove_end_copy_match_emit_encodeSnappyBetterBlockAsm8B:
-	MOVQ BX, AX
+	MOVQ SI, CX
 	JMP  emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B
 
 memmove_long_match_emit_encodeSnappyBetterBlockAsm8B:
-	LEAQ (AX)(R8*1), BX
+	LEAQ (CX)(R9*1), SI
 
 	// genMemMoveLong
-	MOVOU (R9), X0
-	MOVOU 16(R9), X1
-	MOVOU -32(R9)(R8*1), X2
-	MOVOU -16(R9)(R8*1), X3
-	MOVQ  R8, R12
-	SHRQ  $0x05, R12
-	MOVQ  AX, R10
-	ANDL  $0x0000001f, R10
-	MOVQ  $0x00000040, R13
-	SUBQ  R10, R13
-	DECQ  R12
+	MOVOU (R10), X0
+	MOVOU 16(R10), X1
+	MOVOU -32(R10)(R9*1), X2
+	MOVOU -16(R10)(R9*1), X3
+	MOVQ  R9, R13
+	SHRQ  $0x05, R13
+	MOVQ  CX, R11
+	ANDL  $0x0000001f, R11
+	MOVQ  $0x00000040, R14
+	SUBQ  R11, R14
+	DECQ  R13
 	JA    emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(R9)(R13*1), R10
-	LEAQ  -32(AX)(R13*1), R14
+	LEAQ  -32(R10)(R14*1), R11
+	LEAQ  -32(CX)(R14*1), R15
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
-	MOVOU (R10), X4
-	MOVOU 16(R10), X5
-	MOVOA X4, (R14)
-	MOVOA X5, 16(R14)
+	MOVOU (R11), X4
+	MOVOU 16(R11), X5
+	MOVOA X4, (R15)
+	MOVOA X5, 16(R15)
+	ADDQ  $0x20, R15
+	ADDQ  $0x20, R11
 	ADDQ  $0x20, R14
-	ADDQ  $0x20, R10
-	ADDQ  $0x20, R13
-	DECQ  R12
+	DECQ  R13
 	JNA   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(R9)(R13*1), X4
-	MOVOU -16(R9)(R13*1), X5
-	MOVOA X4, -32(AX)(R13*1)
-	MOVOA X5, -16(AX)(R13*1)
-	ADDQ  $0x20, R13
-	CMPQ  R8, R13
+	MOVOU -32(R10)(R14*1), X4
+	MOVOU -16(R10)(R14*1), X5
+	MOVOA X4, -32(CX)(R14*1)
+	MOVOA X5, -16(CX)(R14*1)
+	ADDQ  $0x20, R14
+	CMPQ  R9, R14
 	JAE   emit_lit_memmove_long_match_emit_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(R8*1)
-	MOVOU X3, -16(AX)(R8*1)
-	MOVQ  BX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(R9*1)
+	MOVOU X3, -16(CX)(R9*1)
+	MOVQ  SI, CX
 
 emit_literal_done_match_emit_encodeSnappyBetterBlockAsm8B:
-	ADDL R11, CX
-	ADDL $0x04, R11
-	MOVL CX, 12(SP)
+	ADDL R12, DX
+	ADDL $0x04, R12
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B:
-	CMPL R11, $0x40
+	CMPL R12, $0x40
 	JBE  two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B
-	MOVB $0xee, (AX)
-	MOVW DI, 1(AX)
-	LEAL -60(R11), R11
-	ADDQ $0x03, AX
+	MOVB $0xee, (CX)
+	MOVW R8, 1(CX)
+	LEAL -60(R12), R12
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_encodeSnappyBetterBlockAsm8B
 
 two_byte_offset_short_match_nolit_encodeSnappyBetterBlockAsm8B:
-	MOVL R11, BX
-	SHLL $0x02, BX
-	CMPL R11, $0x0c
+	MOVL R12, SI
+	SHLL $0x02, SI
+	CMPL R12, $0x0c
 	JAE  emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B
-	LEAL -15(BX), BX
-	MOVB DI, 1(AX)
-	SHRL $0x08, DI
-	SHLL $0x05, DI
-	ORL  DI, BX
-	MOVB BL, (AX)
-	ADDQ $0x02, AX
+	LEAL -15(SI), SI
+	MOVB R8, 1(CX)
+	SHRL $0x08, R8
+	SHLL $0x05, R8
+	ORL  R8, SI
+	MOVB SI, (CX)
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B
 
 emit_copy_three_match_nolit_encodeSnappyBetterBlockAsm8B:
-	LEAL -2(BX), BX
-	MOVB BL, (AX)
-	MOVW DI, 1(AX)
-	ADDQ $0x03, AX
+	LEAL -2(SI), SI
+	MOVB SI, (CX)
+	MOVW R8, 1(CX)
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_encodeSnappyBetterBlockAsm8B:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_encodeSnappyBetterBlockAsm8B
-	CMPQ AX, (SP)
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 match_nolit_dst_ok_encodeSnappyBetterBlockAsm8B:
-	MOVQ  $0x0000cf1bbcdcbf9b, BX
-	MOVQ  $0x9e3779b1, DI
-	LEAQ  1(SI), SI
-	LEAQ  -2(CX), R8
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  1(DX)(SI*1), R10
-	MOVQ  (DX)(R8*1), R11
-	MOVQ  1(DX)(R8*1), R12
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x36, R9
-	SHLQ  $0x20, R10
-	IMULQ DI, R10
-	SHRQ  $0x38, R10
-	SHLQ  $0x10, R11
-	IMULQ BX, R11
-	SHRQ  $0x36, R11
-	SHLQ  $0x20, R12
-	IMULQ DI, R12
-	SHRQ  $0x38, R12
-	LEAQ  1(SI), DI
-	LEAQ  1(R8), R13
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  R8, 24(SP)(R11*4)
-	MOVL  DI, 4120(SP)(R10*4)
-	MOVL  R13, 4120(SP)(R12*4)
-	LEAQ  1(R8)(SI*1), DI
-	SHRQ  $0x01, DI
-	ADDQ  $0x01, SI
-	SUBQ  $0x01, R8
+	MOVQ  $0x0000cf1bbcdcbf9b, SI
+	MOVQ  $0x9e3779b1, R8
+	LEAQ  1(DI), DI
+	LEAQ  -2(DX), R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  1(BX)(DI*1), R11
+	MOVQ  (BX)(R9*1), R12
+	MOVQ  1(BX)(R9*1), R13
+	SHLQ  $0x10, R10
+	IMULQ SI, R10
+	SHRQ  $0x36, R10
+	SHLQ  $0x20, R11
+	IMULQ R8, R11
+	SHRQ  $0x38, R11
+	SHLQ  $0x10, R12
+	IMULQ SI, R12
+	SHRQ  $0x36, R12
+	SHLQ  $0x20, R13
+	IMULQ R8, R13
+	SHRQ  $0x38, R13
+	LEAQ  1(DI), R8
+	LEAQ  1(R9), R14
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R9, (AX)(R12*4)
+	MOVL  R8, 4096(AX)(R11*4)
+	MOVL  R14, 4096(AX)(R13*4)
+	LEAQ  1(R9)(DI*1), R8
+	SHRQ  $0x01, R8
+	ADDQ  $0x01, DI
+	SUBQ  $0x01, R9
 
 index_loop_encodeSnappyBetterBlockAsm8B:
-	CMPQ  DI, R8
+	CMPQ  R8, R9
 	JAE   search_loop_encodeSnappyBetterBlockAsm8B
-	MOVQ  (DX)(SI*1), R9
-	MOVQ  (DX)(DI*1), R10
-	SHLQ  $0x10, R9
-	IMULQ BX, R9
-	SHRQ  $0x36, R9
+	MOVQ  (BX)(DI*1), R10
+	MOVQ  (BX)(R8*1), R11
 	SHLQ  $0x10, R10
-	IMULQ BX, R10
+	IMULQ SI, R10
 	SHRQ  $0x36, R10
-	MOVL  SI, 24(SP)(R9*4)
-	MOVL  DI, 24(SP)(R10*4)
-	ADDQ  $0x02, SI
+	SHLQ  $0x10, R11
+	IMULQ SI, R11
+	SHRQ  $0x36, R11
+	MOVL  DI, (AX)(R10*4)
+	MOVL  R8, (AX)(R11*4)
 	ADDQ  $0x02, DI
+	ADDQ  $0x02, R8
 	JMP   index_loop_encodeSnappyBetterBlockAsm8B
 
 emit_remainder_encodeSnappyBetterBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+32(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_encodeSnappyBetterBlockAsm8B
-	MOVQ $0x00000000, ret+48(FP)
+	MOVQ $0x00000000, ret+56(FP)
 	RET
 
 emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
-	MOVQ src_len+32(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+32(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
 	LEAL -1(SI), DX
 	CMPL DX, $0x3c
 	JB   one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B
@@ -17818,26 +17838,26 @@ emit_remainder_ok_encodeSnappyBetterBlockAsm8B:
 	JB   three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 three_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
-	MOVB $0xf4, (AX)
-	MOVW DX, 1(AX)
-	ADDQ $0x03, AX
+	MOVB $0xf4, (CX)
+	MOVW DX, 1(CX)
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 two_bytes_emit_remainder_encodeSnappyBetterBlockAsm8B:
-	MOVB $0xf0, (AX)
-	MOVB DL, 1(AX)
-	ADDQ $0x02, AX
+	MOVB $0xf0, (CX)
+	MOVB DL, 1(CX)
+	ADDQ $0x02, CX
 	CMPL DX, $0x40
 	JB   memmove_emit_remainder_encodeSnappyBetterBlockAsm8B
 	JMP  memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 one_byte_emit_remainder_encodeSnappyBetterBlockAsm8B:
 	SHLB $0x02, DL
-	MOVB DL, (AX)
-	ADDQ $0x01, AX
+	MOVB DL, (CX)
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveShort
@@ -17853,73 +17873,73 @@ memmove_emit_remainder_encodeSnappyBetterBlockAsm8B:
 	JMP  emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_1or2:
-	MOVB (CX), SI
-	MOVB -1(CX)(BX*1), CL
-	MOVB SI, (AX)
-	MOVB CL, -1(AX)(BX*1)
+	MOVB (AX), SI
+	MOVB -1(AX)(BX*1), AL
+	MOVB SI, (CX)
+	MOVB AL, -1(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_3:
-	MOVW (CX), SI
-	MOVB 2(CX), CL
-	MOVW SI, (AX)
-	MOVB CL, 2(AX)
+	MOVW (AX), SI
+	MOVB 2(AX), AL
+	MOVW SI, (CX)
+	MOVB AL, 2(CX)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_4through7:
-	MOVL (CX), SI
-	MOVL -4(CX)(BX*1), CX
-	MOVL SI, (AX)
-	MOVL CX, -4(AX)(BX*1)
+	MOVL (AX), SI
+	MOVL -4(AX)(BX*1), AX
+	MOVL SI, (CX)
+	MOVL AX, -4(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_8through16:
-	MOVQ (CX), SI
-	MOVQ -8(CX)(BX*1), CX
-	MOVQ SI, (AX)
-	MOVQ CX, -8(AX)(BX*1)
+	MOVQ (AX), SI
+	MOVQ -8(AX)(BX*1), AX
+	MOVQ SI, (CX)
+	MOVQ AX, -8(CX)(BX*1)
 	JMP  memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_17through32:
-	MOVOU (CX), X0
-	MOVOU -16(CX)(BX*1), X1
-	MOVOU X0, (AX)
-	MOVOU X1, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU -16(AX)(BX*1), X1
+	MOVOU X0, (CX)
+	MOVOU X1, -16(CX)(BX*1)
 	JMP   memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 emit_lit_memmove_emit_remainder_encodeSnappyBetterBlockAsm8B_memmove_move_33through64:
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
 
 memmove_end_copy_emit_remainder_encodeSnappyBetterBlockAsm8B:
-	MOVQ DX, AX
+	MOVQ DX, CX
 	JMP  emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B
 
 memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8B:
-	LEAQ (AX)(SI*1), DX
+	LEAQ (CX)(SI*1), DX
 	MOVL SI, BX
 
 	// genMemMoveLong
-	MOVOU (CX), X0
-	MOVOU 16(CX), X1
-	MOVOU -32(CX)(BX*1), X2
-	MOVOU -16(CX)(BX*1), X3
+	MOVOU (AX), X0
+	MOVOU 16(AX), X1
+	MOVOU -32(AX)(BX*1), X2
+	MOVOU -16(AX)(BX*1), X3
 	MOVQ  BX, DI
 	SHRQ  $0x05, DI
-	MOVQ  AX, SI
+	MOVQ  CX, SI
 	ANDL  $0x0000001f, SI
 	MOVQ  $0x00000040, R8
 	SUBQ  SI, R8
 	DECQ  DI
 	JA    emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
-	LEAQ  -32(CX)(R8*1), SI
-	LEAQ  -32(AX)(R8*1), R9
+	LEAQ  -32(AX)(R8*1), SI
+	LEAQ  -32(CX)(R8*1), R9
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back:
 	MOVOU (SI), X4
@@ -17933,1136 +17953,1142 @@ emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_
 	JNA   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_big_loop_back
 
 emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32:
-	MOVOU -32(CX)(R8*1), X4
-	MOVOU -16(CX)(R8*1), X5
-	MOVOA X4, -32(AX)(R8*1)
-	MOVOA X5, -16(AX)(R8*1)
+	MOVOU -32(AX)(R8*1), X4
+	MOVOU -16(AX)(R8*1), X5
+	MOVOA X4, -32(CX)(R8*1)
+	MOVOA X5, -16(CX)(R8*1)
 	ADDQ  $0x20, R8
 	CMPQ  BX, R8
 	JAE   emit_lit_memmove_long_emit_remainder_encodeSnappyBetterBlockAsm8Blarge_forward_sse_loop_32
-	MOVOU X0, (AX)
-	MOVOU X1, 16(AX)
-	MOVOU X2, -32(AX)(BX*1)
-	MOVOU X3, -16(AX)(BX*1)
-	MOVQ  DX, AX
+	MOVOU X0, (CX)
+	MOVOU X1, 16(CX)
+	MOVOU X2, -32(CX)(BX*1)
+	MOVOU X3, -16(CX)(BX*1)
+	MOVQ  DX, CX
 
 emit_literal_done_emit_remainder_encodeSnappyBetterBlockAsm8B:
-	MOVQ dst_base+0(FP), CX
-	SUBQ CX, AX
-	MOVQ AX, ret+48(FP)
+	MOVQ dst_base+0(FP), AX
+	SUBQ AX, CX
+	MOVQ CX, ret+56(FP)
 	RET
 
-// func calcBlockSize(src []byte) int
+// func calcBlockSize(src []byte, tmp *[32768]byte) int
 // Requires: BMI, SSE2
-TEXT ·calcBlockSize(SB), $32792-32
-	XORQ AX, AX
-	MOVQ $0x00000100, CX
-	LEAQ 24(SP), DX
+TEXT ·calcBlockSize(SB), $24-40
+	MOVQ tmp+24(FP), AX
+	XORQ CX, CX
+	MOVQ $0x00000100, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_calcBlockSize:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_calcBlockSize
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+8(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+0(FP), DX
+	MOVQ  src_len+8(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+0(FP), BX
 
 search_loop_calcBlockSize:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x05, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x05, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_calcBlockSize
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x33, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x10, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x33, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x10, R9
-	IMULQ R8, R9
-	SHRQ  $0x33, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x10, R11
+	IMULQ R9, R11
+	SHRQ  $0x33, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x10, R10
+	IMULQ R9, R10
+	SHRQ  $0x33, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_calcBlockSize
-	LEAL  1(CX), SI
-	MOVL  12(SP), BX
-	MOVL  SI, DI
-	SUBL  16(SP), DI
+	LEAL  1(DX), DI
+	MOVL  12(SP), SI
+	MOVL  DI, R8
+	SUBL  16(SP), R8
 	JZ    repeat_extend_back_end_calcBlockSize
 
 repeat_extend_back_loop_calcBlockSize:
-	CMPL SI, BX
+	CMPL DI, SI
 	JBE  repeat_extend_back_end_calcBlockSize
-	MOVB -1(DX)(DI*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(R8*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_calcBlockSize
-	LEAL -1(SI), SI
-	DECL DI
+	LEAL -1(DI), DI
+	DECL R8
 	JNZ  repeat_extend_back_loop_calcBlockSize
 
 repeat_extend_back_end_calcBlockSize:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 5(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 5(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_calcBlockSize
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 repeat_dst_size_check_calcBlockSize:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_calcBlockSize
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_calcBlockSize
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_calcBlockSize
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   three_bytes_repeat_emit_calcBlockSize
-	CMPL BX, $0x01000000
+	CMPL SI, $0x01000000
 	JB   four_bytes_repeat_emit_calcBlockSize
-	ADDQ $0x05, AX
+	ADDQ $0x05, CX
 	JMP  memmove_long_repeat_emit_calcBlockSize
 
 four_bytes_repeat_emit_calcBlockSize:
-	ADDQ $0x04, AX
+	ADDQ $0x04, CX
 	JMP  memmove_long_repeat_emit_calcBlockSize
 
 three_bytes_repeat_emit_calcBlockSize:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_calcBlockSize
 
 two_bytes_repeat_emit_calcBlockSize:
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_calcBlockSize
 	JMP  memmove_long_repeat_emit_calcBlockSize
 
 one_byte_repeat_emit_calcBlockSize:
-	ADDQ $0x01, AX
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_calcBlockSize:
-	LEAQ (AX)(DI*1), AX
+	LEAQ (CX)(R8*1), CX
 	JMP  emit_literal_done_repeat_emit_calcBlockSize
 
 memmove_long_repeat_emit_calcBlockSize:
-	LEAQ (AX)(DI*1), AX
+	LEAQ (CX)(R8*1), CX
 
 emit_literal_done_repeat_emit_calcBlockSize:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+8(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+8(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R10, R10
+	XORL R11, R11
 
 matchlen_loopback_16_repeat_extend_calcBlockSize:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_repeat_extend_calcBlockSize
-	MOVQ (R8)(R10*1), R9
-	MOVQ 8(R8)(R10*1), R11
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	MOVQ 8(R9)(R11*1), R12
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
-	XORQ 8(BX)(R10*1), R11
+	XORQ 8(SI)(R11*1), R12
 	JNZ  matchlen_bsf_16repeat_extend_calcBlockSize
-	LEAL -16(DI), DI
-	LEAL 16(R10), R10
+	LEAL -16(R8), R8
+	LEAL 16(R11), R11
 	JMP  matchlen_loopback_16_repeat_extend_calcBlockSize
 
 matchlen_bsf_16repeat_extend_calcBlockSize:
 #ifdef GOAMD64_v3
-	TZCNTQ R11, R11
+	TZCNTQ R12, R12
 
 #else
-	BSFQ R11, R11
+	BSFQ R12, R12
 
 #endif
-	SARQ $0x03, R11
-	LEAL 8(R10)(R11*1), R10
+	SARQ $0x03, R12
+	LEAL 8(R11)(R12*1), R11
 	JMP  repeat_extend_forward_end_calcBlockSize
 
 matchlen_match8_repeat_extend_calcBlockSize:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_repeat_extend_calcBlockSize
-	MOVQ (R8)(R10*1), R9
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSize
-	LEAL -8(DI), DI
-	LEAL 8(R10), R10
+	LEAL -8(R8), R8
+	LEAL 8(R11), R11
 	JMP  matchlen_match4_repeat_extend_calcBlockSize
 
 matchlen_bsf_8_repeat_extend_calcBlockSize:
 #ifdef GOAMD64_v3
-	TZCNTQ R9, R9
+	TZCNTQ R10, R10
 
 #else
-	BSFQ R9, R9
+	BSFQ R10, R10
 
 #endif
-	SARQ $0x03, R9
-	LEAL (R10)(R9*1), R10
+	SARQ $0x03, R10
+	LEAL (R11)(R10*1), R11
 	JMP  repeat_extend_forward_end_calcBlockSize
 
 matchlen_match4_repeat_extend_calcBlockSize:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_repeat_extend_calcBlockSize
-	MOVL (R8)(R10*1), R9
-	CMPL (BX)(R10*1), R9
+	MOVL (R9)(R11*1), R10
+	CMPL (SI)(R11*1), R10
 	JNE  matchlen_match2_repeat_extend_calcBlockSize
-	LEAL -4(DI), DI
-	LEAL 4(R10), R10
+	LEAL -4(R8), R8
+	LEAL 4(R11), R11
 
 matchlen_match2_repeat_extend_calcBlockSize:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_repeat_extend_calcBlockSize
 	JB   repeat_extend_forward_end_calcBlockSize
-	MOVW (R8)(R10*1), R9
-	CMPW (BX)(R10*1), R9
+	MOVW (R9)(R11*1), R10
+	CMPW (SI)(R11*1), R10
 	JNE  matchlen_match1_repeat_extend_calcBlockSize
-	LEAL 2(R10), R10
-	SUBL $0x02, DI
+	LEAL 2(R11), R11
+	SUBL $0x02, R8
 	JZ   repeat_extend_forward_end_calcBlockSize
 
 matchlen_match1_repeat_extend_calcBlockSize:
-	MOVB (R8)(R10*1), R9
-	CMPB (BX)(R10*1), R9
+	MOVB (R9)(R11*1), R10
+	CMPB (SI)(R11*1), R10
 	JNE  repeat_extend_forward_end_calcBlockSize
-	LEAL 1(R10), R10
+	LEAL 1(R11), R11
 
 repeat_extend_forward_end_calcBlockSize:
-	ADDL R10, CX
-	MOVL CX, BX
-	SUBL SI, BX
-	MOVL 16(SP), SI
+	ADDL R11, DX
+	MOVL DX, SI
+	SUBL DI, SI
+	MOVL 16(SP), DI
 
 	// emitCopy
-	CMPL SI, $0x00010000
+	CMPL DI, $0x00010000
 	JB   two_byte_offset_repeat_as_copy_calcBlockSize
 
 four_bytes_loop_back_repeat_as_copy_calcBlockSize:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  four_bytes_remain_repeat_as_copy_calcBlockSize
-	LEAL -64(BX), BX
-	ADDQ $0x05, AX
-	CMPL BX, $0x04
+	LEAL -64(SI), SI
+	ADDQ $0x05, CX
+	CMPL SI, $0x04
 	JB   four_bytes_remain_repeat_as_copy_calcBlockSize
 	JMP  four_bytes_loop_back_repeat_as_copy_calcBlockSize
 
 four_bytes_remain_repeat_as_copy_calcBlockSize:
-	TESTL BX, BX
+	TESTL SI, SI
 	JZ    repeat_end_emit_calcBlockSize
-	XORL  BX, BX
-	ADDQ  $0x05, AX
+	XORL  SI, SI
+	ADDQ  $0x05, CX
 	JMP   repeat_end_emit_calcBlockSize
 
 two_byte_offset_repeat_as_copy_calcBlockSize:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSize
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_repeat_as_copy_calcBlockSize
 
 two_byte_offset_short_repeat_as_copy_calcBlockSize:
-	MOVL BX, DI
-	SHLL $0x02, DI
-	CMPL BX, $0x0c
+	MOVL SI, R8
+	SHLL $0x02, R8
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
-	CMPL SI, $0x00000800
+	CMPL DI, $0x00000800
 	JAE  emit_copy_three_repeat_as_copy_calcBlockSize
-	ADDQ $0x02, AX
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_calcBlockSize
 
 emit_copy_three_repeat_as_copy_calcBlockSize:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 
 repeat_end_emit_calcBlockSize:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_calcBlockSize
 
 no_repeat_found_calcBlockSize:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_calcBlockSize
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_calcBlockSize
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_calcBlockSize
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_calcBlockSize
 
 candidate3_match_calcBlockSize:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_calcBlockSize
 
 candidate2_match_calcBlockSize:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_calcBlockSize:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_calcBlockSize
 
 match_extend_back_loop_calcBlockSize:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_calcBlockSize
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_calcBlockSize
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_calcBlockSize
 	JMP  match_extend_back_loop_calcBlockSize
 
 match_extend_back_end_calcBlockSize:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 5(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 5(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_calcBlockSize
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 match_dst_size_check_calcBlockSize:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_calcBlockSize
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), SI
-	CMPL SI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), DI
+	CMPL DI, $0x3c
 	JB   one_byte_match_emit_calcBlockSize
-	CMPL SI, $0x00000100
+	CMPL DI, $0x00000100
 	JB   two_bytes_match_emit_calcBlockSize
-	CMPL SI, $0x00010000
+	CMPL DI, $0x00010000
 	JB   three_bytes_match_emit_calcBlockSize
-	CMPL SI, $0x01000000
+	CMPL DI, $0x01000000
 	JB   four_bytes_match_emit_calcBlockSize
-	ADDQ $0x05, AX
+	ADDQ $0x05, CX
 	JMP  memmove_long_match_emit_calcBlockSize
 
 four_bytes_match_emit_calcBlockSize:
-	ADDQ $0x04, AX
+	ADDQ $0x04, CX
 	JMP  memmove_long_match_emit_calcBlockSize
 
 three_bytes_match_emit_calcBlockSize:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_calcBlockSize
 
 two_bytes_match_emit_calcBlockSize:
-	ADDQ $0x02, AX
-	CMPL SI, $0x40
+	ADDQ $0x02, CX
+	CMPL DI, $0x40
 	JB   memmove_match_emit_calcBlockSize
 	JMP  memmove_long_match_emit_calcBlockSize
 
 one_byte_match_emit_calcBlockSize:
-	ADDQ $0x01, AX
+	ADDQ $0x01, CX
 
 memmove_match_emit_calcBlockSize:
-	LEAQ (AX)(R8*1), AX
+	LEAQ (CX)(R9*1), CX
 	JMP  emit_literal_done_match_emit_calcBlockSize
 
 memmove_long_match_emit_calcBlockSize:
-	LEAQ (AX)(R8*1), AX
+	LEAQ (CX)(R9*1), CX
 
 emit_literal_done_match_emit_calcBlockSize:
 match_nolit_loop_calcBlockSize:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+8(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+8(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_calcBlockSize:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_calcBlockSize
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_calcBlockSize
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_calcBlockSize
 
 matchlen_bsf_16match_nolit_calcBlockSize:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_calcBlockSize
 
 matchlen_match8_match_nolit_calcBlockSize:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_calcBlockSize
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSize
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_calcBlockSize
 
 matchlen_bsf_8_match_nolit_calcBlockSize:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_calcBlockSize
 
 matchlen_match4_match_nolit_calcBlockSize:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_calcBlockSize
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_calcBlockSize
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_calcBlockSize:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_calcBlockSize
 	JB   match_nolit_end_calcBlockSize
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_calcBlockSize
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_calcBlockSize
 
 matchlen_match1_match_nolit_calcBlockSize:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_calcBlockSize
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_calcBlockSize:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
-	CMPL BX, $0x00010000
+	CMPL SI, $0x00010000
 	JB   two_byte_offset_match_nolit_calcBlockSize
 
 four_bytes_loop_back_match_nolit_calcBlockSize:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  four_bytes_remain_match_nolit_calcBlockSize
-	LEAL -64(R9), R9
-	ADDQ $0x05, AX
-	CMPL R9, $0x04
+	LEAL -64(R10), R10
+	ADDQ $0x05, CX
+	CMPL R10, $0x04
 	JB   four_bytes_remain_match_nolit_calcBlockSize
 	JMP  four_bytes_loop_back_match_nolit_calcBlockSize
 
 four_bytes_remain_match_nolit_calcBlockSize:
-	TESTL R9, R9
+	TESTL R10, R10
 	JZ    match_nolit_emitcopy_end_calcBlockSize
-	XORL  BX, BX
-	ADDQ  $0x05, AX
+	XORL  SI, SI
+	ADDQ  $0x05, CX
 	JMP   match_nolit_emitcopy_end_calcBlockSize
 
 two_byte_offset_match_nolit_calcBlockSize:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_calcBlockSize
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_calcBlockSize
 
 two_byte_offset_short_match_nolit_calcBlockSize:
-	MOVL R9, SI
-	SHLL $0x02, SI
-	CMPL R9, $0x0c
+	MOVL R10, DI
+	SHLL $0x02, DI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_calcBlockSize
-	CMPL BX, $0x00000800
+	CMPL SI, $0x00000800
 	JAE  emit_copy_three_match_nolit_calcBlockSize
-	ADDQ $0x02, AX
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_calcBlockSize
 
 emit_copy_three_match_nolit_calcBlockSize:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_calcBlockSize:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_calcBlockSize
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_calcBlockSize
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 match_nolit_dst_ok_calcBlockSize:
-	MOVQ  $0x0000cf1bbcdcbf9b, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x10, DI
-	IMULQ R8, DI
-	SHRQ  $0x33, DI
-	SHLQ  $0x10, BX
-	IMULQ R8, BX
-	SHRQ  $0x33, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x0000cf1bbcdcbf9b, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x10, R8
+	IMULQ R9, R8
+	SHRQ  $0x33, R8
+	SHLQ  $0x10, SI
+	IMULQ R9, SI
+	SHRQ  $0x33, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_calcBlockSize
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_calcBlockSize
 
 emit_remainder_calcBlockSize:
-	MOVQ src_len+8(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 5(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+8(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 5(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_calcBlockSize
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 emit_remainder_ok_calcBlockSize:
-	MOVQ src_len+8(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+8(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_calcBlockSize
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
-	LEAL -1(SI), CX
-	CMPL CX, $0x3c
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
+	LEAL -1(SI), AX
+	CMPL AX, $0x3c
 	JB   one_byte_emit_remainder_calcBlockSize
-	CMPL CX, $0x00000100
+	CMPL AX, $0x00000100
 	JB   two_bytes_emit_remainder_calcBlockSize
-	CMPL CX, $0x00010000
+	CMPL AX, $0x00010000
 	JB   three_bytes_emit_remainder_calcBlockSize
-	CMPL CX, $0x01000000
+	CMPL AX, $0x01000000
 	JB   four_bytes_emit_remainder_calcBlockSize
-	ADDQ $0x05, AX
+	ADDQ $0x05, CX
 	JMP  memmove_long_emit_remainder_calcBlockSize
 
 four_bytes_emit_remainder_calcBlockSize:
-	ADDQ $0x04, AX
+	ADDQ $0x04, CX
 	JMP  memmove_long_emit_remainder_calcBlockSize
 
 three_bytes_emit_remainder_calcBlockSize:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_calcBlockSize
 
 two_bytes_emit_remainder_calcBlockSize:
-	ADDQ $0x02, AX
-	CMPL CX, $0x40
+	ADDQ $0x02, CX
+	CMPL AX, $0x40
 	JB   memmove_emit_remainder_calcBlockSize
 	JMP  memmove_long_emit_remainder_calcBlockSize
 
 one_byte_emit_remainder_calcBlockSize:
-	ADDQ $0x01, AX
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_calcBlockSize:
-	LEAQ (AX)(SI*1), AX
+	LEAQ (CX)(SI*1), AX
+	MOVQ AX, CX
 	JMP  emit_literal_done_emit_remainder_calcBlockSize
 
 memmove_long_emit_remainder_calcBlockSize:
-	LEAQ (AX)(SI*1), AX
+	LEAQ (CX)(SI*1), AX
+	MOVQ AX, CX
 
 emit_literal_done_emit_remainder_calcBlockSize:
-	MOVQ AX, ret+24(FP)
+	MOVQ CX, ret+32(FP)
 	RET
 
-// func calcBlockSizeSmall(src []byte) int
+// func calcBlockSizeSmall(src []byte, tmp *[2048]byte) int
 // Requires: BMI, SSE2
-TEXT ·calcBlockSizeSmall(SB), $2072-32
-	XORQ AX, AX
-	MOVQ $0x00000010, CX
-	LEAQ 24(SP), DX
+TEXT ·calcBlockSizeSmall(SB), $24-40
+	MOVQ tmp+24(FP), AX
+	XORQ CX, CX
+	MOVQ $0x00000010, DX
+	MOVQ AX, BX
 	PXOR X0, X0
 
 zero_loop_calcBlockSizeSmall:
-	MOVOU X0, (DX)
-	MOVOU X0, 16(DX)
-	MOVOU X0, 32(DX)
-	MOVOU X0, 48(DX)
-	MOVOU X0, 64(DX)
-	MOVOU X0, 80(DX)
-	MOVOU X0, 96(DX)
-	MOVOU X0, 112(DX)
-	ADDQ  $0x80, DX
-	DECQ  CX
+	MOVOU X0, (BX)
+	MOVOU X0, 16(BX)
+	MOVOU X0, 32(BX)
+	MOVOU X0, 48(BX)
+	MOVOU X0, 64(BX)
+	MOVOU X0, 80(BX)
+	MOVOU X0, 96(BX)
+	MOVOU X0, 112(BX)
+	ADDQ  $0x80, BX
+	DECQ  DX
 	JNZ   zero_loop_calcBlockSizeSmall
 	MOVL  $0x00000000, 12(SP)
-	MOVQ  src_len+8(FP), CX
-	LEAQ  -9(CX), DX
-	LEAQ  -8(CX), BX
-	MOVL  BX, 8(SP)
-	SHRQ  $0x05, CX
-	SUBL  CX, DX
-	LEAQ  (AX)(DX*1), DX
-	MOVQ  DX, (SP)
-	MOVL  $0x00000001, CX
-	MOVL  CX, 16(SP)
-	MOVQ  src_base+0(FP), DX
+	MOVQ  src_len+8(FP), DX
+	LEAQ  -9(DX), BX
+	LEAQ  -8(DX), SI
+	MOVL  SI, 8(SP)
+	SHRQ  $0x05, DX
+	SUBL  DX, BX
+	LEAQ  (CX)(BX*1), BX
+	MOVQ  BX, (SP)
+	MOVL  $0x00000001, DX
+	MOVL  DX, 16(SP)
+	MOVQ  src_base+0(FP), BX
 
 search_loop_calcBlockSizeSmall:
-	MOVL  CX, BX
-	SUBL  12(SP), BX
-	SHRL  $0x04, BX
-	LEAL  4(CX)(BX*1), BX
-	CMPL  BX, 8(SP)
+	MOVL  DX, SI
+	SUBL  12(SP), SI
+	SHRL  $0x04, SI
+	LEAL  4(DX)(SI*1), SI
+	CMPL  SI, 8(SP)
 	JAE   emit_remainder_calcBlockSizeSmall
-	MOVQ  (DX)(CX*1), SI
-	MOVL  BX, 20(SP)
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, R9
-	MOVQ  SI, R10
-	SHRQ  $0x08, R10
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x37, R9
+	MOVQ  (BX)(DX*1), DI
+	MOVL  SI, 20(SP)
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R10
+	MOVQ  DI, R11
+	SHRQ  $0x08, R11
 	SHLQ  $0x20, R10
-	IMULQ R8, R10
+	IMULQ R9, R10
 	SHRQ  $0x37, R10
-	MOVL  24(SP)(R9*4), BX
-	MOVL  24(SP)(R10*4), DI
-	MOVL  CX, 24(SP)(R9*4)
-	LEAL  1(CX), R9
-	MOVL  R9, 24(SP)(R10*4)
-	MOVQ  SI, R9
-	SHRQ  $0x10, R9
-	SHLQ  $0x20, R9
-	IMULQ R8, R9
-	SHRQ  $0x37, R9
-	MOVL  CX, R8
-	SUBL  16(SP), R8
-	MOVL  1(DX)(R8*1), R10
-	MOVQ  SI, R8
-	SHRQ  $0x08, R8
-	CMPL  R8, R10
+	SHLQ  $0x20, R11
+	IMULQ R9, R11
+	SHRQ  $0x37, R11
+	MOVL  (AX)(R10*4), SI
+	MOVL  (AX)(R11*4), R8
+	MOVL  DX, (AX)(R10*4)
+	LEAL  1(DX), R10
+	MOVL  R10, (AX)(R11*4)
+	MOVQ  DI, R10
+	SHRQ  $0x10, R10
+	SHLQ  $0x20, R10
+	IMULQ R9, R10
+	SHRQ  $0x37, R10
+	MOVL  DX, R9
+	SUBL  16(SP), R9
+	MOVL  1(BX)(R9*1), R11
+	MOVQ  DI, R9
+	SHRQ  $0x08, R9
+	CMPL  R9, R11
 	JNE   no_repeat_found_calcBlockSizeSmall
-	LEAL  1(CX), SI
-	MOVL  12(SP), BX
-	MOVL  SI, DI
-	SUBL  16(SP), DI
+	LEAL  1(DX), DI
+	MOVL  12(SP), SI
+	MOVL  DI, R8
+	SUBL  16(SP), R8
 	JZ    repeat_extend_back_end_calcBlockSizeSmall
 
 repeat_extend_back_loop_calcBlockSizeSmall:
-	CMPL SI, BX
+	CMPL DI, SI
 	JBE  repeat_extend_back_end_calcBlockSizeSmall
-	MOVB -1(DX)(DI*1), R8
-	MOVB -1(DX)(SI*1), R9
-	CMPB R8, R9
+	MOVB -1(BX)(R8*1), R9
+	MOVB -1(BX)(DI*1), R10
+	CMPB R9, R10
 	JNE  repeat_extend_back_end_calcBlockSizeSmall
-	LEAL -1(SI), SI
-	DECL DI
+	LEAL -1(DI), DI
+	DECL R8
 	JNZ  repeat_extend_back_loop_calcBlockSizeSmall
 
 repeat_extend_back_end_calcBlockSizeSmall:
-	MOVL SI, BX
-	SUBL 12(SP), BX
-	LEAQ 3(AX)(BX*1), BX
-	CMPQ BX, (SP)
+	MOVL DI, SI
+	SUBL 12(SP), SI
+	LEAQ 3(CX)(SI*1), SI
+	CMPQ SI, (SP)
 	JB   repeat_dst_size_check_calcBlockSizeSmall
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 repeat_dst_size_check_calcBlockSizeSmall:
-	MOVL 12(SP), BX
-	CMPL BX, SI
+	MOVL 12(SP), SI
+	CMPL SI, DI
 	JEQ  emit_literal_done_repeat_emit_calcBlockSizeSmall
-	MOVL SI, DI
-	MOVL SI, 12(SP)
-	LEAQ (DX)(BX*1), R8
-	SUBL BX, DI
-	LEAL -1(DI), BX
-	CMPL BX, $0x3c
+	MOVL DI, R8
+	MOVL DI, 12(SP)
+	LEAQ (BX)(SI*1), R9
+	SUBL SI, R8
+	LEAL -1(R8), SI
+	CMPL SI, $0x3c
 	JB   one_byte_repeat_emit_calcBlockSizeSmall
-	CMPL BX, $0x00000100
+	CMPL SI, $0x00000100
 	JB   two_bytes_repeat_emit_calcBlockSizeSmall
 	JB   three_bytes_repeat_emit_calcBlockSizeSmall
 
 three_bytes_repeat_emit_calcBlockSizeSmall:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
 
 two_bytes_repeat_emit_calcBlockSizeSmall:
-	ADDQ $0x02, AX
-	CMPL BX, $0x40
+	ADDQ $0x02, CX
+	CMPL SI, $0x40
 	JB   memmove_repeat_emit_calcBlockSizeSmall
 	JMP  memmove_long_repeat_emit_calcBlockSizeSmall
 
 one_byte_repeat_emit_calcBlockSizeSmall:
-	ADDQ $0x01, AX
+	ADDQ $0x01, CX
 
 memmove_repeat_emit_calcBlockSizeSmall:
-	LEAQ (AX)(DI*1), AX
+	LEAQ (CX)(R8*1), CX
 	JMP  emit_literal_done_repeat_emit_calcBlockSizeSmall
 
 memmove_long_repeat_emit_calcBlockSizeSmall:
-	LEAQ (AX)(DI*1), AX
+	LEAQ (CX)(R8*1), CX
 
 emit_literal_done_repeat_emit_calcBlockSizeSmall:
-	ADDL $0x05, CX
-	MOVL CX, BX
-	SUBL 16(SP), BX
-	MOVQ src_len+8(FP), DI
-	SUBL CX, DI
-	LEAQ (DX)(CX*1), R8
-	LEAQ (DX)(BX*1), BX
+	ADDL $0x05, DX
+	MOVL DX, SI
+	SUBL 16(SP), SI
+	MOVQ src_len+8(FP), R8
+	SUBL DX, R8
+	LEAQ (BX)(DX*1), R9
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R10, R10
+	XORL R11, R11
 
 matchlen_loopback_16_repeat_extend_calcBlockSizeSmall:
-	CMPL DI, $0x10
+	CMPL R8, $0x10
 	JB   matchlen_match8_repeat_extend_calcBlockSizeSmall
-	MOVQ (R8)(R10*1), R9
-	MOVQ 8(R8)(R10*1), R11
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	MOVQ 8(R9)(R11*1), R12
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
-	XORQ 8(BX)(R10*1), R11
+	XORQ 8(SI)(R11*1), R12
 	JNZ  matchlen_bsf_16repeat_extend_calcBlockSizeSmall
-	LEAL -16(DI), DI
-	LEAL 16(R10), R10
+	LEAL -16(R8), R8
+	LEAL 16(R11), R11
 	JMP  matchlen_loopback_16_repeat_extend_calcBlockSizeSmall
 
 matchlen_bsf_16repeat_extend_calcBlockSizeSmall:
 #ifdef GOAMD64_v3
-	TZCNTQ R11, R11
+	TZCNTQ R12, R12
 
 #else
-	BSFQ R11, R11
+	BSFQ R12, R12
 
 #endif
-	SARQ $0x03, R11
-	LEAL 8(R10)(R11*1), R10
+	SARQ $0x03, R12
+	LEAL 8(R11)(R12*1), R11
 	JMP  repeat_extend_forward_end_calcBlockSizeSmall
 
 matchlen_match8_repeat_extend_calcBlockSizeSmall:
-	CMPL DI, $0x08
+	CMPL R8, $0x08
 	JB   matchlen_match4_repeat_extend_calcBlockSizeSmall
-	MOVQ (R8)(R10*1), R9
-	XORQ (BX)(R10*1), R9
+	MOVQ (R9)(R11*1), R10
+	XORQ (SI)(R11*1), R10
 	JNZ  matchlen_bsf_8_repeat_extend_calcBlockSizeSmall
-	LEAL -8(DI), DI
-	LEAL 8(R10), R10
+	LEAL -8(R8), R8
+	LEAL 8(R11), R11
 	JMP  matchlen_match4_repeat_extend_calcBlockSizeSmall
 
 matchlen_bsf_8_repeat_extend_calcBlockSizeSmall:
 #ifdef GOAMD64_v3
-	TZCNTQ R9, R9
+	TZCNTQ R10, R10
 
 #else
-	BSFQ R9, R9
+	BSFQ R10, R10
 
 #endif
-	SARQ $0x03, R9
-	LEAL (R10)(R9*1), R10
+	SARQ $0x03, R10
+	LEAL (R11)(R10*1), R11
 	JMP  repeat_extend_forward_end_calcBlockSizeSmall
 
 matchlen_match4_repeat_extend_calcBlockSizeSmall:
-	CMPL DI, $0x04
+	CMPL R8, $0x04
 	JB   matchlen_match2_repeat_extend_calcBlockSizeSmall
-	MOVL (R8)(R10*1), R9
-	CMPL (BX)(R10*1), R9
+	MOVL (R9)(R11*1), R10
+	CMPL (SI)(R11*1), R10
 	JNE  matchlen_match2_repeat_extend_calcBlockSizeSmall
-	LEAL -4(DI), DI
-	LEAL 4(R10), R10
+	LEAL -4(R8), R8
+	LEAL 4(R11), R11
 
 matchlen_match2_repeat_extend_calcBlockSizeSmall:
-	CMPL DI, $0x01
+	CMPL R8, $0x01
 	JE   matchlen_match1_repeat_extend_calcBlockSizeSmall
 	JB   repeat_extend_forward_end_calcBlockSizeSmall
-	MOVW (R8)(R10*1), R9
-	CMPW (BX)(R10*1), R9
+	MOVW (R9)(R11*1), R10
+	CMPW (SI)(R11*1), R10
 	JNE  matchlen_match1_repeat_extend_calcBlockSizeSmall
-	LEAL 2(R10), R10
-	SUBL $0x02, DI
+	LEAL 2(R11), R11
+	SUBL $0x02, R8
 	JZ   repeat_extend_forward_end_calcBlockSizeSmall
 
 matchlen_match1_repeat_extend_calcBlockSizeSmall:
-	MOVB (R8)(R10*1), R9
-	CMPB (BX)(R10*1), R9
+	MOVB (R9)(R11*1), R10
+	CMPB (SI)(R11*1), R10
 	JNE  repeat_extend_forward_end_calcBlockSizeSmall
-	LEAL 1(R10), R10
+	LEAL 1(R11), R11
 
 repeat_extend_forward_end_calcBlockSizeSmall:
-	ADDL R10, CX
-	MOVL CX, BX
-	SUBL SI, BX
-	MOVL 16(SP), SI
+	ADDL R11, DX
+	MOVL DX, SI
+	SUBL DI, SI
+	MOVL 16(SP), DI
 
 	// emitCopy
 two_byte_offset_repeat_as_copy_calcBlockSizeSmall:
-	CMPL BX, $0x40
+	CMPL SI, $0x40
 	JBE  two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall
-	LEAL -60(BX), BX
-	ADDQ $0x03, AX
+	LEAL -60(SI), SI
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_repeat_as_copy_calcBlockSizeSmall
 
 two_byte_offset_short_repeat_as_copy_calcBlockSizeSmall:
-	MOVL BX, SI
-	SHLL $0x02, SI
-	CMPL BX, $0x0c
+	MOVL SI, DI
+	SHLL $0x02, DI
+	CMPL SI, $0x0c
 	JAE  emit_copy_three_repeat_as_copy_calcBlockSizeSmall
-	ADDQ $0x02, AX
+	ADDQ $0x02, CX
 	JMP  repeat_end_emit_calcBlockSizeSmall
 
 emit_copy_three_repeat_as_copy_calcBlockSizeSmall:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 
 repeat_end_emit_calcBlockSizeSmall:
-	MOVL CX, 12(SP)
+	MOVL DX, 12(SP)
 	JMP  search_loop_calcBlockSizeSmall
 
 no_repeat_found_calcBlockSizeSmall:
-	CMPL (DX)(BX*1), SI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate_match_calcBlockSizeSmall
-	SHRQ $0x08, SI
-	MOVL 24(SP)(R9*4), BX
-	LEAL 2(CX), R8
-	CMPL (DX)(DI*1), SI
+	SHRQ $0x08, DI
+	MOVL (AX)(R10*4), SI
+	LEAL 2(DX), R9
+	CMPL (BX)(R8*1), DI
 	JEQ  candidate2_match_calcBlockSizeSmall
-	MOVL R8, 24(SP)(R9*4)
-	SHRQ $0x08, SI
-	CMPL (DX)(BX*1), SI
+	MOVL R9, (AX)(R10*4)
+	SHRQ $0x08, DI
+	CMPL (BX)(SI*1), DI
 	JEQ  candidate3_match_calcBlockSizeSmall
-	MOVL 20(SP), CX
+	MOVL 20(SP), DX
 	JMP  search_loop_calcBlockSizeSmall
 
 candidate3_match_calcBlockSizeSmall:
-	ADDL $0x02, CX
+	ADDL $0x02, DX
 	JMP  candidate_match_calcBlockSizeSmall
 
 candidate2_match_calcBlockSizeSmall:
-	MOVL R8, 24(SP)(R9*4)
-	INCL CX
-	MOVL DI, BX
+	MOVL R9, (AX)(R10*4)
+	INCL DX
+	MOVL R8, SI
 
 candidate_match_calcBlockSizeSmall:
-	MOVL  12(SP), SI
-	TESTL BX, BX
+	MOVL  12(SP), DI
+	TESTL SI, SI
 	JZ    match_extend_back_end_calcBlockSizeSmall
 
 match_extend_back_loop_calcBlockSizeSmall:
-	CMPL CX, SI
+	CMPL DX, DI
 	JBE  match_extend_back_end_calcBlockSizeSmall
-	MOVB -1(DX)(BX*1), DI
-	MOVB -1(DX)(CX*1), R8
-	CMPB DI, R8
+	MOVB -1(BX)(SI*1), R8
+	MOVB -1(BX)(DX*1), R9
+	CMPB R8, R9
 	JNE  match_extend_back_end_calcBlockSizeSmall
-	LEAL -1(CX), CX
-	DECL BX
+	LEAL -1(DX), DX
+	DECL SI
 	JZ   match_extend_back_end_calcBlockSizeSmall
 	JMP  match_extend_back_loop_calcBlockSizeSmall
 
 match_extend_back_end_calcBlockSizeSmall:
-	MOVL CX, SI
-	SUBL 12(SP), SI
-	LEAQ 3(AX)(SI*1), SI
-	CMPQ SI, (SP)
+	MOVL DX, DI
+	SUBL 12(SP), DI
+	LEAQ 3(CX)(DI*1), DI
+	CMPQ DI, (SP)
 	JB   match_dst_size_check_calcBlockSizeSmall
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 match_dst_size_check_calcBlockSizeSmall:
-	MOVL CX, SI
-	MOVL 12(SP), DI
-	CMPL DI, SI
+	MOVL DX, DI
+	MOVL 12(SP), R8
+	CMPL R8, DI
 	JEQ  emit_literal_done_match_emit_calcBlockSizeSmall
-	MOVL SI, R8
-	MOVL SI, 12(SP)
-	LEAQ (DX)(DI*1), SI
-	SUBL DI, R8
-	LEAL -1(R8), SI
-	CMPL SI, $0x3c
+	MOVL DI, R9
+	MOVL DI, 12(SP)
+	LEAQ (BX)(R8*1), DI
+	SUBL R8, R9
+	LEAL -1(R9), DI
+	CMPL DI, $0x3c
 	JB   one_byte_match_emit_calcBlockSizeSmall
-	CMPL SI, $0x00000100
+	CMPL DI, $0x00000100
 	JB   two_bytes_match_emit_calcBlockSizeSmall
 	JB   three_bytes_match_emit_calcBlockSizeSmall
 
 three_bytes_match_emit_calcBlockSizeSmall:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 	JMP  memmove_long_match_emit_calcBlockSizeSmall
 
 two_bytes_match_emit_calcBlockSizeSmall:
-	ADDQ $0x02, AX
-	CMPL SI, $0x40
+	ADDQ $0x02, CX
+	CMPL DI, $0x40
 	JB   memmove_match_emit_calcBlockSizeSmall
 	JMP  memmove_long_match_emit_calcBlockSizeSmall
 
 one_byte_match_emit_calcBlockSizeSmall:
-	ADDQ $0x01, AX
+	ADDQ $0x01, CX
 
 memmove_match_emit_calcBlockSizeSmall:
-	LEAQ (AX)(R8*1), AX
+	LEAQ (CX)(R9*1), CX
 	JMP  emit_literal_done_match_emit_calcBlockSizeSmall
 
 memmove_long_match_emit_calcBlockSizeSmall:
-	LEAQ (AX)(R8*1), AX
+	LEAQ (CX)(R9*1), CX
 
 emit_literal_done_match_emit_calcBlockSizeSmall:
 match_nolit_loop_calcBlockSizeSmall:
-	MOVL CX, SI
-	SUBL BX, SI
-	MOVL SI, 16(SP)
-	ADDL $0x04, CX
-	ADDL $0x04, BX
-	MOVQ src_len+8(FP), SI
-	SUBL CX, SI
-	LEAQ (DX)(CX*1), DI
-	LEAQ (DX)(BX*1), BX
+	MOVL DX, DI
+	SUBL SI, DI
+	MOVL DI, 16(SP)
+	ADDL $0x04, DX
+	ADDL $0x04, SI
+	MOVQ src_len+8(FP), DI
+	SUBL DX, DI
+	LEAQ (BX)(DX*1), R8
+	LEAQ (BX)(SI*1), SI
 
 	// matchLen
-	XORL R9, R9
+	XORL R10, R10
 
 matchlen_loopback_16_match_nolit_calcBlockSizeSmall:
-	CMPL SI, $0x10
+	CMPL DI, $0x10
 	JB   matchlen_match8_match_nolit_calcBlockSizeSmall
-	MOVQ (DI)(R9*1), R8
-	MOVQ 8(DI)(R9*1), R10
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	MOVQ 8(R8)(R10*1), R11
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
-	XORQ 8(BX)(R9*1), R10
+	XORQ 8(SI)(R10*1), R11
 	JNZ  matchlen_bsf_16match_nolit_calcBlockSizeSmall
-	LEAL -16(SI), SI
-	LEAL 16(R9), R9
+	LEAL -16(DI), DI
+	LEAL 16(R10), R10
 	JMP  matchlen_loopback_16_match_nolit_calcBlockSizeSmall
 
 matchlen_bsf_16match_nolit_calcBlockSizeSmall:
 #ifdef GOAMD64_v3
-	TZCNTQ R10, R10
+	TZCNTQ R11, R11
 
 #else
-	BSFQ R10, R10
+	BSFQ R11, R11
 
 #endif
-	SARQ $0x03, R10
-	LEAL 8(R9)(R10*1), R9
+	SARQ $0x03, R11
+	LEAL 8(R10)(R11*1), R10
 	JMP  match_nolit_end_calcBlockSizeSmall
 
 matchlen_match8_match_nolit_calcBlockSizeSmall:
-	CMPL SI, $0x08
+	CMPL DI, $0x08
 	JB   matchlen_match4_match_nolit_calcBlockSizeSmall
-	MOVQ (DI)(R9*1), R8
-	XORQ (BX)(R9*1), R8
+	MOVQ (R8)(R10*1), R9
+	XORQ (SI)(R10*1), R9
 	JNZ  matchlen_bsf_8_match_nolit_calcBlockSizeSmall
-	LEAL -8(SI), SI
-	LEAL 8(R9), R9
+	LEAL -8(DI), DI
+	LEAL 8(R10), R10
 	JMP  matchlen_match4_match_nolit_calcBlockSizeSmall
 
 matchlen_bsf_8_match_nolit_calcBlockSizeSmall:
 #ifdef GOAMD64_v3
-	TZCNTQ R8, R8
+	TZCNTQ R9, R9
 
 #else
-	BSFQ R8, R8
+	BSFQ R9, R9
 
 #endif
-	SARQ $0x03, R8
-	LEAL (R9)(R8*1), R9
+	SARQ $0x03, R9
+	LEAL (R10)(R9*1), R10
 	JMP  match_nolit_end_calcBlockSizeSmall
 
 matchlen_match4_match_nolit_calcBlockSizeSmall:
-	CMPL SI, $0x04
+	CMPL DI, $0x04
 	JB   matchlen_match2_match_nolit_calcBlockSizeSmall
-	MOVL (DI)(R9*1), R8
-	CMPL (BX)(R9*1), R8
+	MOVL (R8)(R10*1), R9
+	CMPL (SI)(R10*1), R9
 	JNE  matchlen_match2_match_nolit_calcBlockSizeSmall
-	LEAL -4(SI), SI
-	LEAL 4(R9), R9
+	LEAL -4(DI), DI
+	LEAL 4(R10), R10
 
 matchlen_match2_match_nolit_calcBlockSizeSmall:
-	CMPL SI, $0x01
+	CMPL DI, $0x01
 	JE   matchlen_match1_match_nolit_calcBlockSizeSmall
 	JB   match_nolit_end_calcBlockSizeSmall
-	MOVW (DI)(R9*1), R8
-	CMPW (BX)(R9*1), R8
+	MOVW (R8)(R10*1), R9
+	CMPW (SI)(R10*1), R9
 	JNE  matchlen_match1_match_nolit_calcBlockSizeSmall
-	LEAL 2(R9), R9
-	SUBL $0x02, SI
+	LEAL 2(R10), R10
+	SUBL $0x02, DI
 	JZ   match_nolit_end_calcBlockSizeSmall
 
 matchlen_match1_match_nolit_calcBlockSizeSmall:
-	MOVB (DI)(R9*1), R8
-	CMPB (BX)(R9*1), R8
+	MOVB (R8)(R10*1), R9
+	CMPB (SI)(R10*1), R9
 	JNE  match_nolit_end_calcBlockSizeSmall
-	LEAL 1(R9), R9
+	LEAL 1(R10), R10
 
 match_nolit_end_calcBlockSizeSmall:
-	ADDL R9, CX
-	MOVL 16(SP), BX
-	ADDL $0x04, R9
-	MOVL CX, 12(SP)
+	ADDL R10, DX
+	MOVL 16(SP), SI
+	ADDL $0x04, R10
+	MOVL DX, 12(SP)
 
 	// emitCopy
 two_byte_offset_match_nolit_calcBlockSizeSmall:
-	CMPL R9, $0x40
+	CMPL R10, $0x40
 	JBE  two_byte_offset_short_match_nolit_calcBlockSizeSmall
-	LEAL -60(R9), R9
-	ADDQ $0x03, AX
+	LEAL -60(R10), R10
+	ADDQ $0x03, CX
 	JMP  two_byte_offset_match_nolit_calcBlockSizeSmall
 
 two_byte_offset_short_match_nolit_calcBlockSizeSmall:
-	MOVL R9, BX
-	SHLL $0x02, BX
-	CMPL R9, $0x0c
+	MOVL R10, SI
+	SHLL $0x02, SI
+	CMPL R10, $0x0c
 	JAE  emit_copy_three_match_nolit_calcBlockSizeSmall
-	ADDQ $0x02, AX
+	ADDQ $0x02, CX
 	JMP  match_nolit_emitcopy_end_calcBlockSizeSmall
 
 emit_copy_three_match_nolit_calcBlockSizeSmall:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 
 match_nolit_emitcopy_end_calcBlockSizeSmall:
-	CMPL CX, 8(SP)
+	CMPL DX, 8(SP)
 	JAE  emit_remainder_calcBlockSizeSmall
-	MOVQ -2(DX)(CX*1), SI
-	CMPQ AX, (SP)
+	MOVQ -2(BX)(DX*1), DI
+	CMPQ CX, (SP)
 	JB   match_nolit_dst_ok_calcBlockSizeSmall
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 match_nolit_dst_ok_calcBlockSizeSmall:
-	MOVQ  $0x9e3779b1, R8
-	MOVQ  SI, DI
-	SHRQ  $0x10, SI
-	MOVQ  SI, BX
-	SHLQ  $0x20, DI
-	IMULQ R8, DI
-	SHRQ  $0x37, DI
-	SHLQ  $0x20, BX
-	IMULQ R8, BX
-	SHRQ  $0x37, BX
-	LEAL  -2(CX), R8
-	LEAQ  24(SP)(BX*4), R9
-	MOVL  (R9), BX
-	MOVL  R8, 24(SP)(DI*4)
-	MOVL  CX, (R9)
-	CMPL  (DX)(BX*1), SI
+	MOVQ  $0x9e3779b1, R9
+	MOVQ  DI, R8
+	SHRQ  $0x10, DI
+	MOVQ  DI, SI
+	SHLQ  $0x20, R8
+	IMULQ R9, R8
+	SHRQ  $0x37, R8
+	SHLQ  $0x20, SI
+	IMULQ R9, SI
+	SHRQ  $0x37, SI
+	LEAL  -2(DX), R9
+	LEAQ  (AX)(SI*4), R10
+	MOVL  (R10), SI
+	MOVL  R9, (AX)(R8*4)
+	MOVL  DX, (R10)
+	CMPL  (BX)(SI*1), DI
 	JEQ   match_nolit_loop_calcBlockSizeSmall
-	INCL  CX
+	INCL  DX
 	JMP   search_loop_calcBlockSizeSmall
 
 emit_remainder_calcBlockSizeSmall:
-	MOVQ src_len+8(FP), CX
-	SUBL 12(SP), CX
-	LEAQ 3(AX)(CX*1), CX
-	CMPQ CX, (SP)
+	MOVQ src_len+8(FP), AX
+	SUBL 12(SP), AX
+	LEAQ 3(CX)(AX*1), AX
+	CMPQ AX, (SP)
 	JB   emit_remainder_ok_calcBlockSizeSmall
-	MOVQ $0x00000000, ret+24(FP)
+	MOVQ $0x00000000, ret+32(FP)
 	RET
 
 emit_remainder_ok_calcBlockSizeSmall:
-	MOVQ src_len+8(FP), CX
-	MOVL 12(SP), BX
-	CMPL BX, CX
+	MOVQ src_len+8(FP), AX
+	MOVL 12(SP), DX
+	CMPL DX, AX
 	JEQ  emit_literal_done_emit_remainder_calcBlockSizeSmall
-	MOVL CX, SI
-	MOVL CX, 12(SP)
-	LEAQ (DX)(BX*1), CX
-	SUBL BX, SI
-	LEAL -1(SI), CX
-	CMPL CX, $0x3c
+	MOVL AX, SI
+	MOVL AX, 12(SP)
+	LEAQ (BX)(DX*1), AX
+	SUBL DX, SI
+	LEAL -1(SI), AX
+	CMPL AX, $0x3c
 	JB   one_byte_emit_remainder_calcBlockSizeSmall
-	CMPL CX, $0x00000100
+	CMPL AX, $0x00000100
 	JB   two_bytes_emit_remainder_calcBlockSizeSmall
 	JB   three_bytes_emit_remainder_calcBlockSizeSmall
 
 three_bytes_emit_remainder_calcBlockSizeSmall:
-	ADDQ $0x03, AX
+	ADDQ $0x03, CX
 	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
 
 two_bytes_emit_remainder_calcBlockSizeSmall:
-	ADDQ $0x02, AX
-	CMPL CX, $0x40
+	ADDQ $0x02, CX
+	CMPL AX, $0x40
 	JB   memmove_emit_remainder_calcBlockSizeSmall
 	JMP  memmove_long_emit_remainder_calcBlockSizeSmall
 
 one_byte_emit_remainder_calcBlockSizeSmall:
-	ADDQ $0x01, AX
+	ADDQ $0x01, CX
 
 memmove_emit_remainder_calcBlockSizeSmall:
-	LEAQ (AX)(SI*1), AX
+	LEAQ (CX)(SI*1), AX
+	MOVQ AX, CX
 	JMP  emit_literal_done_emit_remainder_calcBlockSizeSmall
 
 memmove_long_emit_remainder_calcBlockSizeSmall:
-	LEAQ (AX)(SI*1), AX
+	LEAQ (CX)(SI*1), AX
+	MOVQ AX, CX
 
 emit_literal_done_emit_remainder_calcBlockSizeSmall:
-	MOVQ AX, ret+24(FP)
+	MOVQ CX, ret+32(FP)
 	RET
 
 // func emitLiteral(dst []byte, lit []byte) int
@@ -19783,7 +19809,7 @@ TEXT ·cvtLZ4BlockAsm(SB), NOSPLIT, $0-64
 	MOVQ src_base+24(FP), DX
 	MOVQ src_len+32(FP), BX
 	LEAQ (DX)(BX*1), BX
-	LEAQ -10(AX)(CX*1), CX
+	LEAQ -8(AX)(CX*1), CX
 	XORQ DI, DI
 
 lz4_s2_loop:
@@ -20266,7 +20292,7 @@ TEXT ·cvtLZ4sBlockAsm(SB), NOSPLIT, $0-64
 	MOVQ src_base+24(FP), DX
 	MOVQ src_len+32(FP), BX
 	LEAQ (DX)(BX*1), BX
-	LEAQ -10(AX)(CX*1), CX
+	LEAQ -8(AX)(CX*1), CX
 	XORQ DI, DI
 
 lz4s_s2_loop:
@@ -20751,7 +20777,7 @@ TEXT ·cvtLZ4BlockSnappyAsm(SB), NOSPLIT, $0-64
 	MOVQ src_base+24(FP), DX
 	MOVQ src_len+32(FP), BX
 	LEAQ (DX)(BX*1), BX
-	LEAQ -10(AX)(CX*1), CX
+	LEAQ -8(AX)(CX*1), CX
 
 lz4_snappy_loop:
 	CMPQ    DX, BX
@@ -21017,7 +21043,7 @@ TEXT ·cvtLZ4sBlockSnappyAsm(SB), NOSPLIT, $0-64
 	MOVQ src_base+24(FP), DX
 	MOVQ src_len+32(FP), BX
 	LEAQ (DX)(BX*1), BX
-	LEAQ -10(AX)(CX*1), CX
+	LEAQ -8(AX)(CX*1), CX
 
 lz4s_snappy_loop:
 	CMPQ    DX, BX
diff --git a/vendor/github.com/klauspost/compress/s2/writer.go b/vendor/github.com/klauspost/compress/s2/writer.go
index 0a46f2b9..fd15078f 100644
--- a/vendor/github.com/klauspost/compress/s2/writer.go
+++ b/vendor/github.com/klauspost/compress/s2/writer.go
@@ -83,11 +83,14 @@ type Writer struct {
 	snappy            bool
 	flushOnWrite      bool
 	appendIndex       bool
+	bufferCB          func([]byte)
 	level             uint8
 }
 
 type result struct {
 	b []byte
+	// return when writing
+	ret []byte
 	// Uncompressed start offset
 	startOffset int64
 }
@@ -146,6 +149,10 @@ func (w *Writer) Reset(writer io.Writer) {
 		for write := range toWrite {
 			// Wait for the data to be available.
 			input := <-write
+			if input.ret != nil && w.bufferCB != nil {
+				w.bufferCB(input.ret)
+				input.ret = nil
+			}
 			in := input.b
 			if len(in) > 0 {
 				if w.err(nil) == nil {
@@ -341,7 +348,8 @@ func (w *Writer) AddSkippableBlock(id uint8, data []byte) (err error) {
 // but the input buffer cannot be written to by the caller
 // until Flush or Close has been called when concurrency != 1.
 //
-// If you cannot control that, use the regular Write function.
+// Use the WriterBufferDone to receive a callback when the buffer is done
+// Processing.
 //
 // Note that input is not buffered.
 // This means that each write will result in discrete blocks being created.
@@ -364,6 +372,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
 	}
 	if w.concurrency == 1 {
 		_, err := w.writeSync(buf)
+		if w.bufferCB != nil {
+			w.bufferCB(buf)
+		}
 		return err
 	}
 
@@ -378,7 +389,7 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
 			hWriter <- result{startOffset: w.uncompWritten, b: magicChunkBytes}
 		}
 	}
-
+	orgBuf := buf
 	for len(buf) > 0 {
 		// Cut input.
 		uncompressed := buf
@@ -397,6 +408,9 @@ func (w *Writer) EncodeBuffer(buf []byte) (err error) {
 			startOffset: w.uncompWritten,
 		}
 		w.uncompWritten += int64(len(uncompressed))
+		if len(buf) == 0 && w.bufferCB != nil {
+			res.ret = orgBuf
+		}
 		go func() {
 			race.ReadSlice(uncompressed)
 
@@ -922,7 +936,7 @@ func WriterBetterCompression() WriterOption {
 }
 
 // WriterBestCompression will enable better compression.
-// EncodeBetter compresses better than Encode but typically with a
+// EncodeBest compresses better than Encode but typically with a
 // big speed decrease on compression.
 func WriterBestCompression() WriterOption {
 	return func(w *Writer) error {
@@ -941,6 +955,17 @@ func WriterUncompressed() WriterOption {
 	}
 }
 
+// WriterBufferDone will perform a callback when EncodeBuffer has finished
+// writing a buffer to the output and the buffer can safely be reused.
+// If the buffer was split into several blocks, it will be sent after the last block.
+// Callbacks will not be done concurrently.
+func WriterBufferDone(fn func(b []byte)) WriterOption {
+	return func(w *Writer) error {
+		w.bufferCB = fn
+		return nil
+	}
+}
+
 // WriterBlockSize allows to override the default block size.
 // Blocks will be this size or smaller.
 // Minimum size is 4KB and maximum size is 4MB.
diff --git a/vendor/github.com/klauspost/compress/zstd/blockdec.go b/vendor/github.com/klauspost/compress/zstd/blockdec.go
index 03744fbc..9c28840c 100644
--- a/vendor/github.com/klauspost/compress/zstd/blockdec.go
+++ b/vendor/github.com/klauspost/compress/zstd/blockdec.go
@@ -598,7 +598,9 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
 					printf("RLE set to 0x%x, code: %v", symb, v)
 				}
 			case compModeFSE:
-				println("Reading table for", tableIndex(i))
+				if debugDecoder {
+					println("Reading table for", tableIndex(i))
+				}
 				if seq.fse == nil || seq.fse.preDefined {
 					seq.fse = fseDecoderPool.Get().(*fseDecoder)
 				}
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_better.go b/vendor/github.com/klauspost/compress/zstd/enc_better.go
index a4f5bf91..84a79fde 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_better.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_better.go
@@ -179,9 +179,9 @@ encodeLoop:
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
-					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+					length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
 
-					seq.matchLen = uint32(lenght - zstdMinMatch)
+					seq.matchLen = uint32(length - zstdMinMatch)
 
 					// We might be able to match backwards.
 					// Extend as long as we can.
@@ -210,12 +210,12 @@ encodeLoop:
 
 					// Index match start+1 (long) -> s - 1
 					index0 := s + repOff
-					s += lenght + repOff
+					s += length + repOff
 
 					nextEmit = s
 					if s >= sLimit {
 						if debugEncoder {
-							println("repeat ended", s, lenght)
+							println("repeat ended", s, length)
 
 						}
 						break encodeLoop
@@ -241,9 +241,9 @@ encodeLoop:
 				if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
 					// Consider history as well.
 					var seq seq
-					lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+					length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
 
-					seq.matchLen = uint32(lenght - zstdMinMatch)
+					seq.matchLen = uint32(length - zstdMinMatch)
 
 					// We might be able to match backwards.
 					// Extend as long as we can.
@@ -270,11 +270,11 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)
 
-					s += lenght + repOff2
+					s += length + repOff2
 					nextEmit = s
 					if s >= sLimit {
 						if debugEncoder {
-							println("repeat ended", s, lenght)
+							println("repeat ended", s, length)
 
 						}
 						break encodeLoop
@@ -708,9 +708,9 @@ encodeLoop:
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
-					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+					length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
 
-					seq.matchLen = uint32(lenght - zstdMinMatch)
+					seq.matchLen = uint32(length - zstdMinMatch)
 
 					// We might be able to match backwards.
 					// Extend as long as we can.
@@ -738,12 +738,12 @@ encodeLoop:
 					blk.sequences = append(blk.sequences, seq)
 
 					// Index match start+1 (long) -> s - 1
-					s += lenght + repOff
+					s += length + repOff
 
 					nextEmit = s
 					if s >= sLimit {
 						if debugEncoder {
-							println("repeat ended", s, lenght)
+							println("repeat ended", s, length)
 
 						}
 						break encodeLoop
@@ -772,9 +772,9 @@ encodeLoop:
 				if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
 					// Consider history as well.
 					var seq seq
-					lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
+					length := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
 
-					seq.matchLen = uint32(lenght - zstdMinMatch)
+					seq.matchLen = uint32(length - zstdMinMatch)
 
 					// We might be able to match backwards.
 					// Extend as long as we can.
@@ -801,11 +801,11 @@ encodeLoop:
 					}
 					blk.sequences = append(blk.sequences, seq)
 
-					s += lenght + repOff2
+					s += length + repOff2
 					nextEmit = s
 					if s >= sLimit {
 						if debugEncoder {
-							println("repeat ended", s, lenght)
+							println("repeat ended", s, length)
 
 						}
 						break encodeLoop
diff --git a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
index a154c18f..d36be7bd 100644
--- a/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
+++ b/vendor/github.com/klauspost/compress/zstd/enc_dfast.go
@@ -138,9 +138,9 @@ encodeLoop:
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
-					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+					length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
 
-					seq.matchLen = uint32(lenght - zstdMinMatch)
+					seq.matchLen = uint32(length - zstdMinMatch)
 
 					// We might be able to match backwards.
 					// Extend as long as we can.
@@ -166,11 +166,11 @@ encodeLoop:
 						println("repeat sequence", seq, "next s:", s)
 					}
 					blk.sequences = append(blk.sequences, seq)
-					s += lenght + repOff
+					s += length + repOff
 					nextEmit = s
 					if s >= sLimit {
 						if debugEncoder {
-							println("repeat ended", s, lenght)
+							println("repeat ended", s, length)
 
 						}
 						break encodeLoop
@@ -798,9 +798,9 @@ encodeLoop:
 				if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
 					// Consider history as well.
 					var seq seq
-					lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
+					length := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
 
-					seq.matchLen = uint32(lenght - zstdMinMatch)
+					seq.matchLen = uint32(length - zstdMinMatch)
 
 					// We might be able to match backwards.
 					// Extend as long as we can.
@@ -826,11 +826,11 @@ encodeLoop:
 						println("repeat sequence", seq, "next s:", s)
 					}
 					blk.sequences = append(blk.sequences, seq)
-					s += lenght + repOff
+					s += length + repOff
 					nextEmit = s
 					if s >= sLimit {
 						if debugEncoder {
-							println("repeat ended", s, lenght)
+							println("repeat ended", s, length)
 
 						}
 						break encodeLoop
diff --git a/vendor/github.com/klauspost/compress/zstd/encoder.go b/vendor/github.com/klauspost/compress/zstd/encoder.go
index 72af7ef0..8f8223cd 100644
--- a/vendor/github.com/klauspost/compress/zstd/encoder.go
+++ b/vendor/github.com/klauspost/compress/zstd/encoder.go
@@ -6,6 +6,7 @@ package zstd
 
 import (
 	"crypto/rand"
+	"errors"
 	"fmt"
 	"io"
 	"math"
@@ -149,6 +150,9 @@ func (e *Encoder) ResetContentSize(w io.Writer, size int64) {
 // and write CRC if requested.
 func (e *Encoder) Write(p []byte) (n int, err error) {
 	s := &e.state
+	if s.eofWritten {
+		return 0, ErrEncoderClosed
+	}
 	for len(p) > 0 {
 		if len(p)+len(s.filling) < e.o.blockSize {
 			if e.o.crc {
@@ -202,7 +206,7 @@ func (e *Encoder) nextBlock(final bool) error {
 			return nil
 		}
 		if final && len(s.filling) > 0 {
-			s.current = e.EncodeAll(s.filling, s.current[:0])
+			s.current = e.encodeAll(s.encoder, s.filling, s.current[:0])
 			var n2 int
 			n2, s.err = s.w.Write(s.current)
 			if s.err != nil {
@@ -288,6 +292,9 @@ func (e *Encoder) nextBlock(final bool) error {
 	s.filling, s.current, s.previous = s.previous[:0], s.filling, s.current
 	s.nInput += int64(len(s.current))
 	s.wg.Add(1)
+	if final {
+		s.eofWritten = true
+	}
 	go func(src []byte) {
 		if debugEncoder {
 			println("Adding block,", len(src), "bytes, final:", final)
@@ -303,9 +310,6 @@ func (e *Encoder) nextBlock(final bool) error {
 		blk := enc.Block()
 		enc.Encode(blk, src)
 		blk.last = final
-		if final {
-			s.eofWritten = true
-		}
 		// Wait for pending writes.
 		s.wWg.Wait()
 		if s.writeErr != nil {
@@ -401,12 +405,20 @@ func (e *Encoder) Flush() error {
 	if len(s.filling) > 0 {
 		err := e.nextBlock(false)
 		if err != nil {
+			// Ignore Flush after Close.
+			if errors.Is(s.err, ErrEncoderClosed) {
+				return nil
+			}
 			return err
 		}
 	}
 	s.wg.Wait()
 	s.wWg.Wait()
 	if s.err != nil {
+		// Ignore Flush after Close.
+		if errors.Is(s.err, ErrEncoderClosed) {
+			return nil
+		}
 		return s.err
 	}
 	return s.writeErr
@@ -422,6 +434,9 @@ func (e *Encoder) Close() error {
 	}
 	err := e.nextBlock(true)
 	if err != nil {
+		if errors.Is(s.err, ErrEncoderClosed) {
+			return nil
+		}
 		return err
 	}
 	if s.frameContentSize > 0 {
@@ -459,6 +474,11 @@ func (e *Encoder) Close() error {
 		}
 		_, s.err = s.w.Write(frame)
 	}
+	if s.err == nil {
+		s.err = ErrEncoderClosed
+		return nil
+	}
+
 	return s.err
 }
 
@@ -469,6 +489,15 @@ func (e *Encoder) Close() error {
 // Data compressed with EncodeAll can be decoded with the Decoder,
 // using either a stream or DecodeAll.
 func (e *Encoder) EncodeAll(src, dst []byte) []byte {
+	e.init.Do(e.initialize)
+	enc := <-e.encoders
+	defer func() {
+		e.encoders <- enc
+	}()
+	return e.encodeAll(enc, src, dst)
+}
+
+func (e *Encoder) encodeAll(enc encoder, src, dst []byte) []byte {
 	if len(src) == 0 {
 		if e.o.fullZero {
 			// Add frame header.
@@ -491,13 +520,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
 		}
 		return dst
 	}
-	e.init.Do(e.initialize)
-	enc := <-e.encoders
-	defer func() {
-		// Release encoder reference to last block.
-		// If a non-single block is needed the encoder will reset again.
-		e.encoders <- enc
-	}()
+
 	// Use single segments when above minimum window and below window size.
 	single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
 	if e.o.single != nil {
diff --git a/vendor/github.com/klauspost/compress/zstd/framedec.go b/vendor/github.com/klauspost/compress/zstd/framedec.go
index 53e160f7..e47af66e 100644
--- a/vendor/github.com/klauspost/compress/zstd/framedec.go
+++ b/vendor/github.com/klauspost/compress/zstd/framedec.go
@@ -146,7 +146,9 @@ func (d *frameDec) reset(br byteBuffer) error {
 			}
 			return err
 		}
-		printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
+		if debugDecoder {
+			printf("raw: %x, mantissa: %d, exponent: %d\n", wd, wd&7, wd>>3)
+		}
 		windowLog := 10 + (wd >> 3)
 		windowBase := uint64(1) << windowLog
 		windowAdd := (windowBase / 8) * uint64(wd&0x7)
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
index 8adabd82..c59f17e0 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
@@ -146,7 +146,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 
 	default:
-		return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
+		return true, fmt.Errorf("sequenceDecs_decode returned erroneous code %d", errCode)
 	}
 
 	s.seqSize += ctx.litRemain
@@ -292,7 +292,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
 			return io.ErrUnexpectedEOF
 		}
 
-		return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
+		return fmt.Errorf("sequenceDecs_decode_amd64 returned erroneous code %d", errCode)
 	}
 
 	if ctx.litRemain < 0 {
diff --git a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
index 5b06174b..f5591fa1 100644
--- a/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
+++ b/vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
@@ -1814,7 +1814,7 @@ TEXT ·sequenceDecs_decodeSync_amd64(SB), $64-32
 	MOVQ    40(SP), AX
 	ADDQ    AX, 48(SP)
 
-	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 	ADDQ R10, 32(SP)
 
 	// outBase += outPosition
@@ -2376,7 +2376,7 @@ TEXT ·sequenceDecs_decodeSync_bmi2(SB), $64-32
 	MOVQ    40(SP), CX
 	ADDQ    CX, 48(SP)
 
-	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 	ADDQ R9, 32(SP)
 
 	// outBase += outPosition
@@ -2896,7 +2896,7 @@ TEXT ·sequenceDecs_decodeSync_safe_amd64(SB), $64-32
 	MOVQ    40(SP), AX
 	ADDQ    AX, 48(SP)
 
-	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 	ADDQ R10, 32(SP)
 
 	// outBase += outPosition
@@ -3560,7 +3560,7 @@ TEXT ·sequenceDecs_decodeSync_safe_bmi2(SB), $64-32
 	MOVQ    40(SP), CX
 	ADDQ    CX, 48(SP)
 
-	// Calculate poiter to s.out[cap(s.out)] (a past-end pointer)
+	// Calculate pointer to s.out[cap(s.out)] (a past-end pointer)
 	ADDQ R9, 32(SP)
 
 	// outBase += outPosition
diff --git a/vendor/github.com/klauspost/compress/zstd/zstd.go b/vendor/github.com/klauspost/compress/zstd/zstd.go
index 4be7cc73..066bef2a 100644
--- a/vendor/github.com/klauspost/compress/zstd/zstd.go
+++ b/vendor/github.com/klauspost/compress/zstd/zstd.go
@@ -88,6 +88,10 @@ var (
 	// Close has been called.
 	ErrDecoderClosed = errors.New("decoder used after Close")
 
+	// ErrEncoderClosed will be returned if the Encoder was used after
+	// Close has been called.
+	ErrEncoderClosed = errors.New("encoder used after Close")
+
 	// ErrDecoderNilInput is returned when a nil Reader was provided
 	// and an operation other than Reset/DecodeAll/Close was attempted.
 	ErrDecoderNilInput = errors.New("nil input provided as reader")
diff --git a/vendor/github.com/klauspost/cpuid/v2/README.md b/vendor/github.com/klauspost/cpuid/v2/README.md
index 30f8d296..21508edb 100644
--- a/vendor/github.com/klauspost/cpuid/v2/README.md
+++ b/vendor/github.com/klauspost/cpuid/v2/README.md
@@ -310,6 +310,7 @@ Exit Code 1
 | AVXSLOW            | Indicates the CPU performs 2 128 bit operations instead of one                                                                                                                     |
 | AVXVNNI            | AVX (VEX encoded) VNNI neural network instructions                                                                                                                                 |
 | AVXVNNIINT8        | AVX-VNNI-INT8 instructions                                                                                                                                                         |
+| AVXVNNIINT16       | AVX-VNNI-INT16 instructions                                                                                                                                                        |
 | BHI_CTRL           | Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598                                                                    |
 | BMI1               | Bit Manipulation Instruction Set 1                                                                                                                                                 |
 | BMI2               | Bit Manipulation Instruction Set 2                                                                                                                                                 |
diff --git a/vendor/github.com/klauspost/cpuid/v2/cpuid.go b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
index 15b76033..53bc18ca 100644
--- a/vendor/github.com/klauspost/cpuid/v2/cpuid.go
+++ b/vendor/github.com/klauspost/cpuid/v2/cpuid.go
@@ -67,195 +67,201 @@ const (
 	// Keep index -1 as unknown
 	UNKNOWN = -1
 
-	// Add features
-	ADX                FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
-	AESNI                               // Advanced Encryption Standard New Instructions
-	AMD3DNOW                            // AMD 3DNOW
-	AMD3DNOWEXT                         // AMD 3DNowExt
-	AMXBF16                             // Tile computational operations on BFLOAT16 numbers
-	AMXFP16                             // Tile computational operations on FP16 numbers
-	AMXINT8                             // Tile computational operations on 8-bit integers
-	AMXTILE                             // Tile architecture
-	APX_F                               // Intel APX
-	AVX                                 // AVX functions
-	AVX10                               // If set the Intel AVX10 Converged Vector ISA is supported
-	AVX10_128                           // If set indicates that AVX10 128-bit vector support is present
-	AVX10_256                           // If set indicates that AVX10 256-bit vector support is present
-	AVX10_512                           // If set indicates that AVX10 512-bit vector support is present
-	AVX2                                // AVX2 functions
-	AVX512BF16                          // AVX-512 BFLOAT16 Instructions
-	AVX512BITALG                        // AVX-512 Bit Algorithms
-	AVX512BW                            // AVX-512 Byte and Word Instructions
-	AVX512CD                            // AVX-512 Conflict Detection Instructions
-	AVX512DQ                            // AVX-512 Doubleword and Quadword Instructions
-	AVX512ER                            // AVX-512 Exponential and Reciprocal Instructions
-	AVX512F                             // AVX-512 Foundation
-	AVX512FP16                          // AVX-512 FP16 Instructions
-	AVX512IFMA                          // AVX-512 Integer Fused Multiply-Add Instructions
-	AVX512PF                            // AVX-512 Prefetch Instructions
-	AVX512VBMI                          // AVX-512 Vector Bit Manipulation Instructions
-	AVX512VBMI2                         // AVX-512 Vector Bit Manipulation Instructions, Version 2
-	AVX512VL                            // AVX-512 Vector Length Extensions
-	AVX512VNNI                          // AVX-512 Vector Neural Network Instructions
-	AVX512VP2INTERSECT                  // AVX-512 Intersect for D/Q
-	AVX512VPOPCNTDQ                     // AVX-512 Vector Population Count Doubleword and Quadword
-	AVXIFMA                             // AVX-IFMA instructions
-	AVXNECONVERT                        // AVX-NE-CONVERT instructions
-	AVXSLOW                             // Indicates the CPU performs 2 128 bit operations instead of one
-	AVXVNNI                             // AVX (VEX encoded) VNNI neural network instructions
-	AVXVNNIINT8                         // AVX-VNNI-INT8 instructions
-	BHI_CTRL                            // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
-	BMI1                                // Bit Manipulation Instruction Set 1
-	BMI2                                // Bit Manipulation Instruction Set 2
-	CETIBT                              // Intel CET Indirect Branch Tracking
-	CETSS                               // Intel CET Shadow Stack
-	CLDEMOTE                            // Cache Line Demote
-	CLMUL                               // Carry-less Multiplication
-	CLZERO                              // CLZERO instruction supported
-	CMOV                                // i686 CMOV
-	CMPCCXADD                           // CMPCCXADD instructions
-	CMPSB_SCADBS_SHORT                  // Fast short CMPSB and SCASB
-	CMPXCHG8                            // CMPXCHG8 instruction
-	CPBOOST                             // Core Performance Boost
-	CPPC                                // AMD: Collaborative Processor Performance Control
-	CX16                                // CMPXCHG16B Instruction
-	EFER_LMSLE_UNS                      // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
-	ENQCMD                              // Enqueue Command
-	ERMS                                // Enhanced REP MOVSB/STOSB
-	F16C                                // Half-precision floating-point conversion
-	FLUSH_L1D                           // Flush L1D cache
-	FMA3                                // Intel FMA 3. Does not imply AVX.
-	FMA4                                // Bulldozer FMA4 functions
-	FP128                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
-	FP256                               // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
-	FSRM                                // Fast Short Rep Mov
-	FXSR                                // FXSAVE, FXRESTOR instructions, CR4 bit 9
-	FXSROPT                             // FXSAVE/FXRSTOR optimizations
-	GFNI                                // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
-	HLE                                 // Hardware Lock Elision
-	HRESET                              // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
-	HTT                                 // Hyperthreading (enabled)
-	HWA                                 // Hardware assert supported. Indicates support for MSRC001_10
-	HYBRID_CPU                          // This part has CPUs of more than one type.
-	HYPERVISOR                          // This bit has been reserved by Intel & AMD for use by hypervisors
-	IA32_ARCH_CAP                       // IA32_ARCH_CAPABILITIES MSR (Intel)
-	IA32_CORE_CAP                       // IA32_CORE_CAPABILITIES MSR
-	IBPB                                // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
-	IBRS                                // AMD: Indirect Branch Restricted Speculation
-	IBRS_PREFERRED                      // AMD: IBRS is preferred over software solution
-	IBRS_PROVIDES_SMP                   // AMD: IBRS provides Same Mode Protection
-	IBS                                 // Instruction Based Sampling (AMD)
-	IBSBRNTRGT                          // Instruction Based Sampling Feature (AMD)
-	IBSFETCHSAM                         // Instruction Based Sampling Feature (AMD)
-	IBSFFV                              // Instruction Based Sampling Feature (AMD)
-	IBSOPCNT                            // Instruction Based Sampling Feature (AMD)
-	IBSOPCNTEXT                         // Instruction Based Sampling Feature (AMD)
-	IBSOPSAM                            // Instruction Based Sampling Feature (AMD)
-	IBSRDWROPCNT                        // Instruction Based Sampling Feature (AMD)
-	IBSRIPINVALIDCHK                    // Instruction Based Sampling Feature (AMD)
-	IBS_FETCH_CTLX                      // AMD: IBS fetch control extended MSR supported
-	IBS_OPDATA4                         // AMD: IBS op data 4 MSR supported
-	IBS_OPFUSE                          // AMD: Indicates support for IbsOpFuse
-	IBS_PREVENTHOST                     // Disallowing IBS use by the host supported
-	IBS_ZEN4                            // AMD: Fetch and Op IBS support IBS extensions added with Zen4
-	IDPRED_CTRL                         // IPRED_DIS
-	INT_WBINVD                          // WBINVD/WBNOINVD are interruptible.
-	INVLPGB                             // NVLPGB and TLBSYNC instruction supported
-	KEYLOCKER                           // Key locker
-	KEYLOCKERW                          // Key locker wide
-	LAHF                                // LAHF/SAHF in long mode
-	LAM                                 // If set, CPU supports Linear Address Masking
-	LBRVIRT                             // LBR virtualization
-	LZCNT                               // LZCNT instruction
-	MCAOVERFLOW                         // MCA overflow recovery support.
-	MCDT_NO                             // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
-	MCOMMIT                             // MCOMMIT instruction supported
-	MD_CLEAR                            // VERW clears CPU buffers
-	MMX                                 // standard MMX
-	MMXEXT                              // SSE integer functions or AMD MMX ext
-	MOVBE                               // MOVBE instruction (big-endian)
-	MOVDIR64B                           // Move 64 Bytes as Direct Store
-	MOVDIRI                             // Move Doubleword as Direct Store
-	MOVSB_ZL                            // Fast Zero-Length MOVSB
-	MOVU                                // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
-	MPX                                 // Intel MPX (Memory Protection Extensions)
-	MSRIRC                              // Instruction Retired Counter MSR available
-	MSRLIST                             // Read/Write List of Model Specific Registers
-	MSR_PAGEFLUSH                       // Page Flush MSR available
-	NRIPS                               // Indicates support for NRIP save on VMEXIT
-	NX                                  // NX (No-Execute) bit
-	OSXSAVE                             // XSAVE enabled by OS
-	PCONFIG                             // PCONFIG for Intel Multi-Key Total Memory Encryption
-	POPCNT                              // POPCNT instruction
-	PPIN                                // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
-	PREFETCHI                           // PREFETCHIT0/1 instructions
-	PSFD                                // Predictive Store Forward Disable
-	RDPRU                               // RDPRU instruction supported
-	RDRAND                              // RDRAND instruction is available
-	RDSEED                              // RDSEED instruction is available
-	RDTSCP                              // RDTSCP Instruction
-	RRSBA_CTRL                          // Restricted RSB Alternate
-	RTM                                 // Restricted Transactional Memory
-	RTM_ALWAYS_ABORT                    // Indicates that the loaded microcode is forcing RTM abort.
-	SERIALIZE                           // Serialize Instruction Execution
-	SEV                                 // AMD Secure Encrypted Virtualization supported
-	SEV_64BIT                           // AMD SEV guest execution only allowed from a 64-bit host
-	SEV_ALTERNATIVE                     // AMD SEV Alternate Injection supported
-	SEV_DEBUGSWAP                       // Full debug state swap supported for SEV-ES guests
-	SEV_ES                              // AMD SEV Encrypted State supported
-	SEV_RESTRICTED                      // AMD SEV Restricted Injection supported
-	SEV_SNP                             // AMD SEV Secure Nested Paging supported
-	SGX                                 // Software Guard Extensions
-	SGXLC                               // Software Guard Extensions Launch Control
-	SHA                                 // Intel SHA Extensions
-	SME                                 // AMD Secure Memory Encryption supported
-	SME_COHERENT                        // AMD Hardware cache coherency across encryption domains enforced
-	SPEC_CTRL_SSBD                      // Speculative Store Bypass Disable
-	SRBDS_CTRL                          // SRBDS mitigation MSR available
-	SSE                                 // SSE functions
-	SSE2                                // P4 SSE functions
-	SSE3                                // Prescott SSE3 functions
-	SSE4                                // Penryn SSE4.1 functions
-	SSE42                               // Nehalem SSE4.2 functions
-	SSE4A                               // AMD Barcelona microarchitecture SSE4a instructions
-	SSSE3                               // Conroe SSSE3 functions
-	STIBP                               // Single Thread Indirect Branch Predictors
-	STIBP_ALWAYSON                      // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
-	STOSB_SHORT                         // Fast short STOSB
-	SUCCOR                              // Software uncorrectable error containment and recovery capability.
-	SVM                                 // AMD Secure Virtual Machine
-	SVMDA                               // Indicates support for the SVM decode assists.
-	SVMFBASID                           // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
-	SVML                                // AMD SVM lock. Indicates support for SVM-Lock.
-	SVMNP                               // AMD SVM nested paging
-	SVMPF                               // SVM pause intercept filter. Indicates support for the pause intercept filter
-	SVMPFT                              // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
-	SYSCALL                             // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
-	SYSEE                               // SYSENTER and SYSEXIT instructions
-	TBM                                 // AMD Trailing Bit Manipulation
-	TDX_GUEST                           // Intel Trust Domain Extensions Guest
-	TLB_FLUSH_NESTED                    // AMD: Flushing includes all the nested translations for guest translations
-	TME                                 // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
-	TOPEXT                              // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
-	TSCRATEMSR                          // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
-	TSXLDTRK                            // Intel TSX Suspend Load Address Tracking
-	VAES                                // Vector AES. AVX(512) versions requires additional checks.
-	VMCBCLEAN                           // VMCB clean bits. Indicates support for VMCB clean bits.
-	VMPL                                // AMD VM Permission Levels supported
-	VMSA_REGPROT                        // AMD VMSA Register Protection supported
-	VMX                                 // Virtual Machine Extensions
-	VPCLMULQDQ                          // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
-	VTE                                 // AMD Virtual Transparent Encryption supported
-	WAITPKG                             // TPAUSE, UMONITOR, UMWAIT
-	WBNOINVD                            // Write Back and Do Not Invalidate Cache
-	WRMSRNS                             // Non-Serializing Write to Model Specific Register
-	X87                                 // FPU
-	XGETBV1                             // Supports XGETBV with ECX = 1
-	XOP                                 // Bulldozer XOP functions
-	XSAVE                               // XSAVE, XRESTOR, XSETBV, XGETBV
-	XSAVEC                              // Supports XSAVEC and the compacted form of XRSTOR.
-	XSAVEOPT                            // XSAVEOPT available
-	XSAVES                              // Supports XSAVES/XRSTORS and IA32_XSS
+	// x86 features
+	ADX                 FeatureID = iota // Intel ADX (Multi-Precision Add-Carry Instruction Extensions)
+	AESNI                                // Advanced Encryption Standard New Instructions
+	AMD3DNOW                             // AMD 3DNOW
+	AMD3DNOWEXT                          // AMD 3DNowExt
+	AMXBF16                              // Tile computational operations on BFLOAT16 numbers
+	AMXFP16                              // Tile computational operations on FP16 numbers
+	AMXINT8                              // Tile computational operations on 8-bit integers
+	AMXTILE                              // Tile architecture
+	APX_F                                // Intel APX
+	AVX                                  // AVX functions
+	AVX10                                // If set the Intel AVX10 Converged Vector ISA is supported
+	AVX10_128                            // If set indicates that AVX10 128-bit vector support is present
+	AVX10_256                            // If set indicates that AVX10 256-bit vector support is present
+	AVX10_512                            // If set indicates that AVX10 512-bit vector support is present
+	AVX2                                 // AVX2 functions
+	AVX512BF16                           // AVX-512 BFLOAT16 Instructions
+	AVX512BITALG                         // AVX-512 Bit Algorithms
+	AVX512BW                             // AVX-512 Byte and Word Instructions
+	AVX512CD                             // AVX-512 Conflict Detection Instructions
+	AVX512DQ                             // AVX-512 Doubleword and Quadword Instructions
+	AVX512ER                             // AVX-512 Exponential and Reciprocal Instructions
+	AVX512F                              // AVX-512 Foundation
+	AVX512FP16                           // AVX-512 FP16 Instructions
+	AVX512IFMA                           // AVX-512 Integer Fused Multiply-Add Instructions
+	AVX512PF                             // AVX-512 Prefetch Instructions
+	AVX512VBMI                           // AVX-512 Vector Bit Manipulation Instructions
+	AVX512VBMI2                          // AVX-512 Vector Bit Manipulation Instructions, Version 2
+	AVX512VL                             // AVX-512 Vector Length Extensions
+	AVX512VNNI                           // AVX-512 Vector Neural Network Instructions
+	AVX512VP2INTERSECT                   // AVX-512 Intersect for D/Q
+	AVX512VPOPCNTDQ                      // AVX-512 Vector Population Count Doubleword and Quadword
+	AVXIFMA                              // AVX-IFMA instructions
+	AVXNECONVERT                         // AVX-NE-CONVERT instructions
+	AVXSLOW                              // Indicates the CPU performs 2 128 bit operations instead of one
+	AVXVNNI                              // AVX (VEX encoded) VNNI neural network instructions
+	AVXVNNIINT8                          // AVX-VNNI-INT8 instructions
+	AVXVNNIINT16                         // AVX-VNNI-INT16 instructions
+	BHI_CTRL                             // Branch History Injection and Intra-mode Branch Target Injection / CVE-2022-0001, CVE-2022-0002 / INTEL-SA-00598
+	BMI1                                 // Bit Manipulation Instruction Set 1
+	BMI2                                 // Bit Manipulation Instruction Set 2
+	CETIBT                               // Intel CET Indirect Branch Tracking
+	CETSS                                // Intel CET Shadow Stack
+	CLDEMOTE                             // Cache Line Demote
+	CLMUL                                // Carry-less Multiplication
+	CLZERO                               // CLZERO instruction supported
+	CMOV                                 // i686 CMOV
+	CMPCCXADD                            // CMPCCXADD instructions
+	CMPSB_SCADBS_SHORT                   // Fast short CMPSB and SCASB
+	CMPXCHG8                             // CMPXCHG8 instruction
+	CPBOOST                              // Core Performance Boost
+	CPPC                                 // AMD: Collaborative Processor Performance Control
+	CX16                                 // CMPXCHG16B Instruction
+	EFER_LMSLE_UNS                       // AMD: =Core::X86::Msr::EFER[LMSLE] is not supported, and MBZ
+	ENQCMD                               // Enqueue Command
+	ERMS                                 // Enhanced REP MOVSB/STOSB
+	F16C                                 // Half-precision floating-point conversion
+	FLUSH_L1D                            // Flush L1D cache
+	FMA3                                 // Intel FMA 3. Does not imply AVX.
+	FMA4                                 // Bulldozer FMA4 functions
+	FP128                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 128-bits wide
+	FP256                                // AMD: When set, the internal FP/SIMD execution datapath is no more than 256-bits wide
+	FSRM                                 // Fast Short Rep Mov
+	FXSR                                 // FXSAVE, FXRESTOR instructions, CR4 bit 9
+	FXSROPT                              // FXSAVE/FXRSTOR optimizations
+	GFNI                                 // Galois Field New Instructions. May require other features (AVX, AVX512VL,AVX512F) based on usage.
+	HLE                                  // Hardware Lock Elision
+	HRESET                               // If set CPU supports history reset and the IA32_HRESET_ENABLE MSR
+	HTT                                  // Hyperthreading (enabled)
+	HWA                                  // Hardware assert supported. Indicates support for MSRC001_10
+	HYBRID_CPU                           // This part has CPUs of more than one type.
+	HYPERVISOR                           // This bit has been reserved by Intel & AMD for use by hypervisors
+	IA32_ARCH_CAP                        // IA32_ARCH_CAPABILITIES MSR (Intel)
+	IA32_CORE_CAP                        // IA32_CORE_CAPABILITIES MSR
+	IBPB                                 // Indirect Branch Restricted Speculation (IBRS) and Indirect Branch Predictor Barrier (IBPB)
+	IBPB_BRTYPE                          // Indicates that MSR 49h (PRED_CMD) bit 0 (IBPB) flushes	all branch type predictions from the CPU branch predictor
+	IBRS                                 // AMD: Indirect Branch Restricted Speculation
+	IBRS_PREFERRED                       // AMD: IBRS is preferred over software solution
+	IBRS_PROVIDES_SMP                    // AMD: IBRS provides Same Mode Protection
+	IBS                                  // Instruction Based Sampling (AMD)
+	IBSBRNTRGT                           // Instruction Based Sampling Feature (AMD)
+	IBSFETCHSAM                          // Instruction Based Sampling Feature (AMD)
+	IBSFFV                               // Instruction Based Sampling Feature (AMD)
+	IBSOPCNT                             // Instruction Based Sampling Feature (AMD)
+	IBSOPCNTEXT                          // Instruction Based Sampling Feature (AMD)
+	IBSOPSAM                             // Instruction Based Sampling Feature (AMD)
+	IBSRDWROPCNT                         // Instruction Based Sampling Feature (AMD)
+	IBSRIPINVALIDCHK                     // Instruction Based Sampling Feature (AMD)
+	IBS_FETCH_CTLX                       // AMD: IBS fetch control extended MSR supported
+	IBS_OPDATA4                          // AMD: IBS op data 4 MSR supported
+	IBS_OPFUSE                           // AMD: Indicates support for IbsOpFuse
+	IBS_PREVENTHOST                      // Disallowing IBS use by the host supported
+	IBS_ZEN4                             // AMD: Fetch and Op IBS support IBS extensions added with Zen4
+	IDPRED_CTRL                          // IPRED_DIS
+	INT_WBINVD                           // WBINVD/WBNOINVD are interruptible.
+	INVLPGB                              // NVLPGB and TLBSYNC instruction supported
+	KEYLOCKER                            // Key locker
+	KEYLOCKERW                           // Key locker wide
+	LAHF                                 // LAHF/SAHF in long mode
+	LAM                                  // If set, CPU supports Linear Address Masking
+	LBRVIRT                              // LBR virtualization
+	LZCNT                                // LZCNT instruction
+	MCAOVERFLOW                          // MCA overflow recovery support.
+	MCDT_NO                              // Processor do not exhibit MXCSR Configuration Dependent Timing behavior and do not need to mitigate it.
+	MCOMMIT                              // MCOMMIT instruction supported
+	MD_CLEAR                             // VERW clears CPU buffers
+	MMX                                  // standard MMX
+	MMXEXT                               // SSE integer functions or AMD MMX ext
+	MOVBE                                // MOVBE instruction (big-endian)
+	MOVDIR64B                            // Move 64 Bytes as Direct Store
+	MOVDIRI                              // Move Doubleword as Direct Store
+	MOVSB_ZL                             // Fast Zero-Length MOVSB
+	MOVU                                 // AMD: MOVU SSE instructions are more efficient and should be preferred to SSE	MOVL/MOVH. MOVUPS is more efficient than MOVLPS/MOVHPS. MOVUPD is more efficient than MOVLPD/MOVHPD
+	MPX                                  // Intel MPX (Memory Protection Extensions)
+	MSRIRC                               // Instruction Retired Counter MSR available
+	MSRLIST                              // Read/Write List of Model Specific Registers
+	MSR_PAGEFLUSH                        // Page Flush MSR available
+	NRIPS                                // Indicates support for NRIP save on VMEXIT
+	NX                                   // NX (No-Execute) bit
+	OSXSAVE                              // XSAVE enabled by OS
+	PCONFIG                              // PCONFIG for Intel Multi-Key Total Memory Encryption
+	POPCNT                               // POPCNT instruction
+	PPIN                                 // AMD: Protected Processor Inventory Number support. Indicates that Protected Processor Inventory Number (PPIN) capability can be enabled
+	PREFETCHI                            // PREFETCHIT0/1 instructions
+	PSFD                                 // Predictive Store Forward Disable
+	RDPRU                                // RDPRU instruction supported
+	RDRAND                               // RDRAND instruction is available
+	RDSEED                               // RDSEED instruction is available
+	RDTSCP                               // RDTSCP Instruction
+	RRSBA_CTRL                           // Restricted RSB Alternate
+	RTM                                  // Restricted Transactional Memory
+	RTM_ALWAYS_ABORT                     // Indicates that the loaded microcode is forcing RTM abort.
+	SBPB                                 // Indicates support for the Selective Branch Predictor Barrier
+	SERIALIZE                            // Serialize Instruction Execution
+	SEV                                  // AMD Secure Encrypted Virtualization supported
+	SEV_64BIT                            // AMD SEV guest execution only allowed from a 64-bit host
+	SEV_ALTERNATIVE                      // AMD SEV Alternate Injection supported
+	SEV_DEBUGSWAP                        // Full debug state swap supported for SEV-ES guests
+	SEV_ES                               // AMD SEV Encrypted State supported
+	SEV_RESTRICTED                       // AMD SEV Restricted Injection supported
+	SEV_SNP                              // AMD SEV Secure Nested Paging supported
+	SGX                                  // Software Guard Extensions
+	SGXLC                                // Software Guard Extensions Launch Control
+	SHA                                  // Intel SHA Extensions
+	SME                                  // AMD Secure Memory Encryption supported
+	SME_COHERENT                         // AMD Hardware cache coherency across encryption domains enforced
+	SPEC_CTRL_SSBD                       // Speculative Store Bypass Disable
+	SRBDS_CTRL                           // SRBDS mitigation MSR available
+	SRSO_MSR_FIX                         // Indicates that software may use MSR BP_CFG[BpSpecReduce] to mitigate SRSO.
+	SRSO_NO                              // Indicates the CPU is not subject to the SRSO vulnerability
+	SRSO_USER_KERNEL_NO                  // Indicates the CPU is not subject to the SRSO vulnerability across user/kernel boundaries
+	SSE                                  // SSE functions
+	SSE2                                 // P4 SSE functions
+	SSE3                                 // Prescott SSE3 functions
+	SSE4                                 // Penryn SSE4.1 functions
+	SSE42                                // Nehalem SSE4.2 functions
+	SSE4A                                // AMD Barcelona microarchitecture SSE4a instructions
+	SSSE3                                // Conroe SSSE3 functions
+	STIBP                                // Single Thread Indirect Branch Predictors
+	STIBP_ALWAYSON                       // AMD: Single Thread Indirect Branch Prediction Mode has Enhanced Performance and may be left Always On
+	STOSB_SHORT                          // Fast short STOSB
+	SUCCOR                               // Software uncorrectable error containment and recovery capability.
+	SVM                                  // AMD Secure Virtual Machine
+	SVMDA                                // Indicates support for the SVM decode assists.
+	SVMFBASID                            // SVM, Indicates that TLB flush events, including CR3 writes and CR4.PGE toggles, flush only the current ASID's TLB entries. Also indicates support for the extended VMCBTLB_Control
+	SVML                                 // AMD SVM lock. Indicates support for SVM-Lock.
+	SVMNP                                // AMD SVM nested paging
+	SVMPF                                // SVM pause intercept filter. Indicates support for the pause intercept filter
+	SVMPFT                               // SVM PAUSE filter threshold. Indicates support for the PAUSE filter cycle count threshold
+	SYSCALL                              // System-Call Extension (SCE): SYSCALL and SYSRET instructions.
+	SYSEE                                // SYSENTER and SYSEXIT instructions
+	TBM                                  // AMD Trailing Bit Manipulation
+	TDX_GUEST                            // Intel Trust Domain Extensions Guest
+	TLB_FLUSH_NESTED                     // AMD: Flushing includes all the nested translations for guest translations
+	TME                                  // Intel Total Memory Encryption. The following MSRs are supported: IA32_TME_CAPABILITY, IA32_TME_ACTIVATE, IA32_TME_EXCLUDE_MASK, and IA32_TME_EXCLUDE_BASE.
+	TOPEXT                               // TopologyExtensions: topology extensions support. Indicates support for CPUID Fn8000_001D_EAX_x[N:0]-CPUID Fn8000_001E_EDX.
+	TSCRATEMSR                           // MSR based TSC rate control. Indicates support for MSR TSC ratio MSRC000_0104
+	TSXLDTRK                             // Intel TSX Suspend Load Address Tracking
+	VAES                                 // Vector AES. AVX(512) versions requires additional checks.
+	VMCBCLEAN                            // VMCB clean bits. Indicates support for VMCB clean bits.
+	VMPL                                 // AMD VM Permission Levels supported
+	VMSA_REGPROT                         // AMD VMSA Register Protection supported
+	VMX                                  // Virtual Machine Extensions
+	VPCLMULQDQ                           // Carry-Less Multiplication Quadword. Requires AVX for 3 register versions.
+	VTE                                  // AMD Virtual Transparent Encryption supported
+	WAITPKG                              // TPAUSE, UMONITOR, UMWAIT
+	WBNOINVD                             // Write Back and Do Not Invalidate Cache
+	WRMSRNS                              // Non-Serializing Write to Model Specific Register
+	X87                                  // FPU
+	XGETBV1                              // Supports XGETBV with ECX = 1
+	XOP                                  // Bulldozer XOP functions
+	XSAVE                                // XSAVE, XRESTOR, XSETBV, XGETBV
+	XSAVEC                               // Supports XSAVEC and the compacted form of XRSTOR.
+	XSAVEOPT                             // XSAVEOPT available
+	XSAVES                               // Supports XSAVES/XRSTORS and IA32_XSS
 
 	// ARM features:
 	AESARM   // AES instructions
@@ -309,10 +315,11 @@ type CPUInfo struct {
 		L2  int // L2 Cache (per core or shared). Will be -1 if undetected
 		L3  int // L3 Cache (per core, per ccx or shared). Will be -1 if undetected
 	}
-	SGX        SGXSupport
-	AVX10Level uint8
-	maxFunc    uint32
-	maxExFunc  uint32
+	SGX              SGXSupport
+	AMDMemEncryption AMDMemEncryptionSupport
+	AVX10Level       uint8
+	maxFunc          uint32
+	maxExFunc        uint32
 }
 
 var cpuid func(op uint32) (eax, ebx, ecx, edx uint32)
@@ -1079,6 +1086,32 @@ func hasSGX(available, lc bool) (rval SGXSupport) {
 	return
 }
 
+type AMDMemEncryptionSupport struct {
+	Available          bool
+	CBitPossition      uint32
+	NumVMPL            uint32
+	PhysAddrReduction  uint32
+	NumEntryptedGuests uint32
+	MinSevNoEsAsid     uint32
+}
+
+func hasAMDMemEncryption(available bool) (rval AMDMemEncryptionSupport) {
+	rval.Available = available
+	if !available {
+		return
+	}
+
+	_, b, c, d := cpuidex(0x8000001f, 0)
+
+	rval.CBitPossition = b & 0x3f
+	rval.PhysAddrReduction = (b >> 6) & 0x3F
+	rval.NumVMPL = (b >> 12) & 0xf
+	rval.NumEntryptedGuests = c
+	rval.MinSevNoEsAsid = d
+
+	return
+}
+
 func support() flagSet {
 	var fs flagSet
 	mfi := maxFunctionID()
@@ -1210,6 +1243,7 @@ func support() flagSet {
 		// CPUID.(EAX=7, ECX=1).EDX
 		fs.setIf(edx1&(1<<4) != 0, AVXVNNIINT8)
 		fs.setIf(edx1&(1<<5) != 0, AVXNECONVERT)
+		fs.setIf(edx1&(1<<10) != 0, AVXVNNIINT16)
 		fs.setIf(edx1&(1<<14) != 0, PREFETCHI)
 		fs.setIf(edx1&(1<<19) != 0, AVX10)
 		fs.setIf(edx1&(1<<21) != 0, APX_F)
@@ -1418,6 +1452,15 @@ func support() flagSet {
 		fs.setIf((a>>24)&1 == 1, VMSA_REGPROT)
 	}
 
+	if maxExtendedFunction() >= 0x80000021 && vend == AMD {
+		a, _, _, _ := cpuid(0x80000021)
+		fs.setIf((a>>31)&1 == 1, SRSO_MSR_FIX)
+		fs.setIf((a>>30)&1 == 1, SRSO_USER_KERNEL_NO)
+		fs.setIf((a>>29)&1 == 1, SRSO_NO)
+		fs.setIf((a>>28)&1 == 1, IBPB_BRTYPE)
+		fs.setIf((a>>27)&1 == 1, SBPB)
+	}
+
 	if mfi >= 0x20 {
 		// Microsoft has decided to purposefully hide the information
 		// of the guest TEE when VMs are being created using Hyper-V.
diff --git a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
index c7dfa125..799b400c 100644
--- a/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
+++ b/vendor/github.com/klauspost/cpuid/v2/detect_x86.go
@@ -27,6 +27,7 @@ func addInfo(c *CPUInfo, safe bool) {
 	c.Family, c.Model, c.Stepping = familyModel()
 	c.featureSet = support()
 	c.SGX = hasSGX(c.featureSet.inSet(SGX), c.featureSet.inSet(SGXLC))
+	c.AMDMemEncryption = hasAMDMemEncryption(c.featureSet.inSet(SME) || c.featureSet.inSet(SEV))
 	c.ThreadsPerCore = threadsPerCore()
 	c.LogicalCores = logicalCores()
 	c.PhysicalCores = physicalCores()
diff --git a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
index 43bd05f5..3a256031 100644
--- a/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
+++ b/vendor/github.com/klauspost/cpuid/v2/featureid_string.go
@@ -44,189 +44,195 @@ func _() {
 	_ = x[AVXSLOW-34]
 	_ = x[AVXVNNI-35]
 	_ = x[AVXVNNIINT8-36]
-	_ = x[BHI_CTRL-37]
-	_ = x[BMI1-38]
-	_ = x[BMI2-39]
-	_ = x[CETIBT-40]
-	_ = x[CETSS-41]
-	_ = x[CLDEMOTE-42]
-	_ = x[CLMUL-43]
-	_ = x[CLZERO-44]
-	_ = x[CMOV-45]
-	_ = x[CMPCCXADD-46]
-	_ = x[CMPSB_SCADBS_SHORT-47]
-	_ = x[CMPXCHG8-48]
-	_ = x[CPBOOST-49]
-	_ = x[CPPC-50]
-	_ = x[CX16-51]
-	_ = x[EFER_LMSLE_UNS-52]
-	_ = x[ENQCMD-53]
-	_ = x[ERMS-54]
-	_ = x[F16C-55]
-	_ = x[FLUSH_L1D-56]
-	_ = x[FMA3-57]
-	_ = x[FMA4-58]
-	_ = x[FP128-59]
-	_ = x[FP256-60]
-	_ = x[FSRM-61]
-	_ = x[FXSR-62]
-	_ = x[FXSROPT-63]
-	_ = x[GFNI-64]
-	_ = x[HLE-65]
-	_ = x[HRESET-66]
-	_ = x[HTT-67]
-	_ = x[HWA-68]
-	_ = x[HYBRID_CPU-69]
-	_ = x[HYPERVISOR-70]
-	_ = x[IA32_ARCH_CAP-71]
-	_ = x[IA32_CORE_CAP-72]
-	_ = x[IBPB-73]
-	_ = x[IBRS-74]
-	_ = x[IBRS_PREFERRED-75]
-	_ = x[IBRS_PROVIDES_SMP-76]
-	_ = x[IBS-77]
-	_ = x[IBSBRNTRGT-78]
-	_ = x[IBSFETCHSAM-79]
-	_ = x[IBSFFV-80]
-	_ = x[IBSOPCNT-81]
-	_ = x[IBSOPCNTEXT-82]
-	_ = x[IBSOPSAM-83]
-	_ = x[IBSRDWROPCNT-84]
-	_ = x[IBSRIPINVALIDCHK-85]
-	_ = x[IBS_FETCH_CTLX-86]
-	_ = x[IBS_OPDATA4-87]
-	_ = x[IBS_OPFUSE-88]
-	_ = x[IBS_PREVENTHOST-89]
-	_ = x[IBS_ZEN4-90]
-	_ = x[IDPRED_CTRL-91]
-	_ = x[INT_WBINVD-92]
-	_ = x[INVLPGB-93]
-	_ = x[KEYLOCKER-94]
-	_ = x[KEYLOCKERW-95]
-	_ = x[LAHF-96]
-	_ = x[LAM-97]
-	_ = x[LBRVIRT-98]
-	_ = x[LZCNT-99]
-	_ = x[MCAOVERFLOW-100]
-	_ = x[MCDT_NO-101]
-	_ = x[MCOMMIT-102]
-	_ = x[MD_CLEAR-103]
-	_ = x[MMX-104]
-	_ = x[MMXEXT-105]
-	_ = x[MOVBE-106]
-	_ = x[MOVDIR64B-107]
-	_ = x[MOVDIRI-108]
-	_ = x[MOVSB_ZL-109]
-	_ = x[MOVU-110]
-	_ = x[MPX-111]
-	_ = x[MSRIRC-112]
-	_ = x[MSRLIST-113]
-	_ = x[MSR_PAGEFLUSH-114]
-	_ = x[NRIPS-115]
-	_ = x[NX-116]
-	_ = x[OSXSAVE-117]
-	_ = x[PCONFIG-118]
-	_ = x[POPCNT-119]
-	_ = x[PPIN-120]
-	_ = x[PREFETCHI-121]
-	_ = x[PSFD-122]
-	_ = x[RDPRU-123]
-	_ = x[RDRAND-124]
-	_ = x[RDSEED-125]
-	_ = x[RDTSCP-126]
-	_ = x[RRSBA_CTRL-127]
-	_ = x[RTM-128]
-	_ = x[RTM_ALWAYS_ABORT-129]
-	_ = x[SERIALIZE-130]
-	_ = x[SEV-131]
-	_ = x[SEV_64BIT-132]
-	_ = x[SEV_ALTERNATIVE-133]
-	_ = x[SEV_DEBUGSWAP-134]
-	_ = x[SEV_ES-135]
-	_ = x[SEV_RESTRICTED-136]
-	_ = x[SEV_SNP-137]
-	_ = x[SGX-138]
-	_ = x[SGXLC-139]
-	_ = x[SHA-140]
-	_ = x[SME-141]
-	_ = x[SME_COHERENT-142]
-	_ = x[SPEC_CTRL_SSBD-143]
-	_ = x[SRBDS_CTRL-144]
-	_ = x[SSE-145]
-	_ = x[SSE2-146]
-	_ = x[SSE3-147]
-	_ = x[SSE4-148]
-	_ = x[SSE42-149]
-	_ = x[SSE4A-150]
-	_ = x[SSSE3-151]
-	_ = x[STIBP-152]
-	_ = x[STIBP_ALWAYSON-153]
-	_ = x[STOSB_SHORT-154]
-	_ = x[SUCCOR-155]
-	_ = x[SVM-156]
-	_ = x[SVMDA-157]
-	_ = x[SVMFBASID-158]
-	_ = x[SVML-159]
-	_ = x[SVMNP-160]
-	_ = x[SVMPF-161]
-	_ = x[SVMPFT-162]
-	_ = x[SYSCALL-163]
-	_ = x[SYSEE-164]
-	_ = x[TBM-165]
-	_ = x[TDX_GUEST-166]
-	_ = x[TLB_FLUSH_NESTED-167]
-	_ = x[TME-168]
-	_ = x[TOPEXT-169]
-	_ = x[TSCRATEMSR-170]
-	_ = x[TSXLDTRK-171]
-	_ = x[VAES-172]
-	_ = x[VMCBCLEAN-173]
-	_ = x[VMPL-174]
-	_ = x[VMSA_REGPROT-175]
-	_ = x[VMX-176]
-	_ = x[VPCLMULQDQ-177]
-	_ = x[VTE-178]
-	_ = x[WAITPKG-179]
-	_ = x[WBNOINVD-180]
-	_ = x[WRMSRNS-181]
-	_ = x[X87-182]
-	_ = x[XGETBV1-183]
-	_ = x[XOP-184]
-	_ = x[XSAVE-185]
-	_ = x[XSAVEC-186]
-	_ = x[XSAVEOPT-187]
-	_ = x[XSAVES-188]
-	_ = x[AESARM-189]
-	_ = x[ARMCPUID-190]
-	_ = x[ASIMD-191]
-	_ = x[ASIMDDP-192]
-	_ = x[ASIMDHP-193]
-	_ = x[ASIMDRDM-194]
-	_ = x[ATOMICS-195]
-	_ = x[CRC32-196]
-	_ = x[DCPOP-197]
-	_ = x[EVTSTRM-198]
-	_ = x[FCMA-199]
-	_ = x[FP-200]
-	_ = x[FPHP-201]
-	_ = x[GPA-202]
-	_ = x[JSCVT-203]
-	_ = x[LRCPC-204]
-	_ = x[PMULL-205]
-	_ = x[SHA1-206]
-	_ = x[SHA2-207]
-	_ = x[SHA3-208]
-	_ = x[SHA512-209]
-	_ = x[SM3-210]
-	_ = x[SM4-211]
-	_ = x[SVE-212]
-	_ = x[lastID-213]
+	_ = x[AVXVNNIINT16-37]
+	_ = x[BHI_CTRL-38]
+	_ = x[BMI1-39]
+	_ = x[BMI2-40]
+	_ = x[CETIBT-41]
+	_ = x[CETSS-42]
+	_ = x[CLDEMOTE-43]
+	_ = x[CLMUL-44]
+	_ = x[CLZERO-45]
+	_ = x[CMOV-46]
+	_ = x[CMPCCXADD-47]
+	_ = x[CMPSB_SCADBS_SHORT-48]
+	_ = x[CMPXCHG8-49]
+	_ = x[CPBOOST-50]
+	_ = x[CPPC-51]
+	_ = x[CX16-52]
+	_ = x[EFER_LMSLE_UNS-53]
+	_ = x[ENQCMD-54]
+	_ = x[ERMS-55]
+	_ = x[F16C-56]
+	_ = x[FLUSH_L1D-57]
+	_ = x[FMA3-58]
+	_ = x[FMA4-59]
+	_ = x[FP128-60]
+	_ = x[FP256-61]
+	_ = x[FSRM-62]
+	_ = x[FXSR-63]
+	_ = x[FXSROPT-64]
+	_ = x[GFNI-65]
+	_ = x[HLE-66]
+	_ = x[HRESET-67]
+	_ = x[HTT-68]
+	_ = x[HWA-69]
+	_ = x[HYBRID_CPU-70]
+	_ = x[HYPERVISOR-71]
+	_ = x[IA32_ARCH_CAP-72]
+	_ = x[IA32_CORE_CAP-73]
+	_ = x[IBPB-74]
+	_ = x[IBPB_BRTYPE-75]
+	_ = x[IBRS-76]
+	_ = x[IBRS_PREFERRED-77]
+	_ = x[IBRS_PROVIDES_SMP-78]
+	_ = x[IBS-79]
+	_ = x[IBSBRNTRGT-80]
+	_ = x[IBSFETCHSAM-81]
+	_ = x[IBSFFV-82]
+	_ = x[IBSOPCNT-83]
+	_ = x[IBSOPCNTEXT-84]
+	_ = x[IBSOPSAM-85]
+	_ = x[IBSRDWROPCNT-86]
+	_ = x[IBSRIPINVALIDCHK-87]
+	_ = x[IBS_FETCH_CTLX-88]
+	_ = x[IBS_OPDATA4-89]
+	_ = x[IBS_OPFUSE-90]
+	_ = x[IBS_PREVENTHOST-91]
+	_ = x[IBS_ZEN4-92]
+	_ = x[IDPRED_CTRL-93]
+	_ = x[INT_WBINVD-94]
+	_ = x[INVLPGB-95]
+	_ = x[KEYLOCKER-96]
+	_ = x[KEYLOCKERW-97]
+	_ = x[LAHF-98]
+	_ = x[LAM-99]
+	_ = x[LBRVIRT-100]
+	_ = x[LZCNT-101]
+	_ = x[MCAOVERFLOW-102]
+	_ = x[MCDT_NO-103]
+	_ = x[MCOMMIT-104]
+	_ = x[MD_CLEAR-105]
+	_ = x[MMX-106]
+	_ = x[MMXEXT-107]
+	_ = x[MOVBE-108]
+	_ = x[MOVDIR64B-109]
+	_ = x[MOVDIRI-110]
+	_ = x[MOVSB_ZL-111]
+	_ = x[MOVU-112]
+	_ = x[MPX-113]
+	_ = x[MSRIRC-114]
+	_ = x[MSRLIST-115]
+	_ = x[MSR_PAGEFLUSH-116]
+	_ = x[NRIPS-117]
+	_ = x[NX-118]
+	_ = x[OSXSAVE-119]
+	_ = x[PCONFIG-120]
+	_ = x[POPCNT-121]
+	_ = x[PPIN-122]
+	_ = x[PREFETCHI-123]
+	_ = x[PSFD-124]
+	_ = x[RDPRU-125]
+	_ = x[RDRAND-126]
+	_ = x[RDSEED-127]
+	_ = x[RDTSCP-128]
+	_ = x[RRSBA_CTRL-129]
+	_ = x[RTM-130]
+	_ = x[RTM_ALWAYS_ABORT-131]
+	_ = x[SBPB-132]
+	_ = x[SERIALIZE-133]
+	_ = x[SEV-134]
+	_ = x[SEV_64BIT-135]
+	_ = x[SEV_ALTERNATIVE-136]
+	_ = x[SEV_DEBUGSWAP-137]
+	_ = x[SEV_ES-138]
+	_ = x[SEV_RESTRICTED-139]
+	_ = x[SEV_SNP-140]
+	_ = x[SGX-141]
+	_ = x[SGXLC-142]
+	_ = x[SHA-143]
+	_ = x[SME-144]
+	_ = x[SME_COHERENT-145]
+	_ = x[SPEC_CTRL_SSBD-146]
+	_ = x[SRBDS_CTRL-147]
+	_ = x[SRSO_MSR_FIX-148]
+	_ = x[SRSO_NO-149]
+	_ = x[SRSO_USER_KERNEL_NO-150]
+	_ = x[SSE-151]
+	_ = x[SSE2-152]
+	_ = x[SSE3-153]
+	_ = x[SSE4-154]
+	_ = x[SSE42-155]
+	_ = x[SSE4A-156]
+	_ = x[SSSE3-157]
+	_ = x[STIBP-158]
+	_ = x[STIBP_ALWAYSON-159]
+	_ = x[STOSB_SHORT-160]
+	_ = x[SUCCOR-161]
+	_ = x[SVM-162]
+	_ = x[SVMDA-163]
+	_ = x[SVMFBASID-164]
+	_ = x[SVML-165]
+	_ = x[SVMNP-166]
+	_ = x[SVMPF-167]
+	_ = x[SVMPFT-168]
+	_ = x[SYSCALL-169]
+	_ = x[SYSEE-170]
+	_ = x[TBM-171]
+	_ = x[TDX_GUEST-172]
+	_ = x[TLB_FLUSH_NESTED-173]
+	_ = x[TME-174]
+	_ = x[TOPEXT-175]
+	_ = x[TSCRATEMSR-176]
+	_ = x[TSXLDTRK-177]
+	_ = x[VAES-178]
+	_ = x[VMCBCLEAN-179]
+	_ = x[VMPL-180]
+	_ = x[VMSA_REGPROT-181]
+	_ = x[VMX-182]
+	_ = x[VPCLMULQDQ-183]
+	_ = x[VTE-184]
+	_ = x[WAITPKG-185]
+	_ = x[WBNOINVD-186]
+	_ = x[WRMSRNS-187]
+	_ = x[X87-188]
+	_ = x[XGETBV1-189]
+	_ = x[XOP-190]
+	_ = x[XSAVE-191]
+	_ = x[XSAVEC-192]
+	_ = x[XSAVEOPT-193]
+	_ = x[XSAVES-194]
+	_ = x[AESARM-195]
+	_ = x[ARMCPUID-196]
+	_ = x[ASIMD-197]
+	_ = x[ASIMDDP-198]
+	_ = x[ASIMDHP-199]
+	_ = x[ASIMDRDM-200]
+	_ = x[ATOMICS-201]
+	_ = x[CRC32-202]
+	_ = x[DCPOP-203]
+	_ = x[EVTSTRM-204]
+	_ = x[FCMA-205]
+	_ = x[FP-206]
+	_ = x[FPHP-207]
+	_ = x[GPA-208]
+	_ = x[JSCVT-209]
+	_ = x[LRCPC-210]
+	_ = x[PMULL-211]
+	_ = x[SHA1-212]
+	_ = x[SHA2-213]
+	_ = x[SHA3-214]
+	_ = x[SHA512-215]
+	_ = x[SM3-216]
+	_ = x[SM4-217]
+	_ = x[SVE-218]
+	_ = x[lastID-219]
 	_ = x[firstID-0]
 }
 
-const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
+const _FeatureID_name = "firstIDADXAESNIAMD3DNOWAMD3DNOWEXTAMXBF16AMXFP16AMXINT8AMXTILEAPX_FAVXAVX10AVX10_128AVX10_256AVX10_512AVX2AVX512BF16AVX512BITALGAVX512BWAVX512CDAVX512DQAVX512ERAVX512FAVX512FP16AVX512IFMAAVX512PFAVX512VBMIAVX512VBMI2AVX512VLAVX512VNNIAVX512VP2INTERSECTAVX512VPOPCNTDQAVXIFMAAVXNECONVERTAVXSLOWAVXVNNIAVXVNNIINT8AVXVNNIINT16BHI_CTRLBMI1BMI2CETIBTCETSSCLDEMOTECLMULCLZEROCMOVCMPCCXADDCMPSB_SCADBS_SHORTCMPXCHG8CPBOOSTCPPCCX16EFER_LMSLE_UNSENQCMDERMSF16CFLUSH_L1DFMA3FMA4FP128FP256FSRMFXSRFXSROPTGFNIHLEHRESETHTTHWAHYBRID_CPUHYPERVISORIA32_ARCH_CAPIA32_CORE_CAPIBPBIBPB_BRTYPEIBRSIBRS_PREFERREDIBRS_PROVIDES_SMPIBSIBSBRNTRGTIBSFETCHSAMIBSFFVIBSOPCNTIBSOPCNTEXTIBSOPSAMIBSRDWROPCNTIBSRIPINVALIDCHKIBS_FETCH_CTLXIBS_OPDATA4IBS_OPFUSEIBS_PREVENTHOSTIBS_ZEN4IDPRED_CTRLINT_WBINVDINVLPGBKEYLOCKERKEYLOCKERWLAHFLAMLBRVIRTLZCNTMCAOVERFLOWMCDT_NOMCOMMITMD_CLEARMMXMMXEXTMOVBEMOVDIR64BMOVDIRIMOVSB_ZLMOVUMPXMSRIRCMSRLISTMSR_PAGEFLUSHNRIPSNXOSXSAVEPCONFIGPOPCNTPPINPREFETCHIPSFDRDPRURDRANDRDSEEDRDTSCPRRSBA_CTRLRTMRTM_ALWAYS_ABORTSBPBSERIALIZESEVSEV_64BITSEV_ALTERNATIVESEV_DEBUGSWAPSEV_ESSEV_RESTRICTEDSEV_SNPSGXSGXLCSHASMESME_COHERENTSPEC_CTRL_SSBDSRBDS_CTRLSRSO_MSR_FIXSRSO_NOSRSO_USER_KERNEL_NOSSESSE2SSE3SSE4SSE42SSE4ASSSE3STIBPSTIBP_ALWAYSONSTOSB_SHORTSUCCORSVMSVMDASVMFBASIDSVMLSVMNPSVMPFSVMPFTSYSCALLSYSEETBMTDX_GUESTTLB_FLUSH_NESTEDTMETOPEXTTSCRATEMSRTSXLDTRKVAESVMCBCLEANVMPLVMSA_REGPROTVMXVPCLMULQDQVTEWAITPKGWBNOINVDWRMSRNSX87XGETBV1XOPXSAVEXSAVECXSAVEOPTXSAVESAESARMARMCPUIDASIMDASIMDDPASIMDHPASIMDRDMATOMICSCRC32DCPOPEVTSTRMFCMAFPFPHPGPAJSCVTLRCPCPMULLSHA1SHA2SHA3SHA512SM3SM4SVElastID"
 
-var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 319, 323, 327, 333, 338, 346, 351, 357, 361, 370, 388, 396, 403, 407, 411, 425, 431, 435, 439, 448, 452, 456, 461, 466, 470, 474, 481, 485, 488, 494, 497, 500, 510, 520, 533, 546, 550, 554, 568, 585, 588, 598, 609, 615, 623, 634, 642, 654, 670, 684, 695, 705, 720, 728, 739, 749, 756, 765, 775, 779, 782, 789, 794, 805, 812, 819, 827, 830, 836, 841, 850, 857, 865, 869, 872, 878, 885, 898, 903, 905, 912, 919, 925, 929, 938, 942, 947, 953, 959, 965, 975, 978, 994, 1003, 1006, 1015, 1030, 1043, 1049, 1063, 1070, 1073, 1078, 1081, 1084, 1096, 1110, 1120, 1123, 1127, 1131, 1135, 1140, 1145, 1150, 1155, 1169, 1180, 1186, 1189, 1194, 1203, 1207, 1212, 1217, 1223, 1230, 1235, 1238, 1247, 1263, 1266, 1272, 1282, 1290, 1294, 1303, 1307, 1319, 1322, 1332, 1335, 1342, 1350, 1357, 1360, 1367, 1370, 1375, 1381, 1389, 1395, 1401, 1409, 1414, 1421, 1428, 1436, 1443, 1448, 1453, 1460, 1464, 1466, 1470, 1473, 1478, 1483, 1488, 1492, 1496, 1500, 1506, 1509, 1512, 1515, 1521}
+var _FeatureID_index = [...]uint16{0, 7, 10, 15, 23, 34, 41, 48, 55, 62, 67, 70, 75, 84, 93, 102, 106, 116, 128, 136, 144, 152, 160, 167, 177, 187, 195, 205, 216, 224, 234, 252, 267, 274, 286, 293, 300, 311, 323, 331, 335, 339, 345, 350, 358, 363, 369, 373, 382, 400, 408, 415, 419, 423, 437, 443, 447, 451, 460, 464, 468, 473, 478, 482, 486, 493, 497, 500, 506, 509, 512, 522, 532, 545, 558, 562, 573, 577, 591, 608, 611, 621, 632, 638, 646, 657, 665, 677, 693, 707, 718, 728, 743, 751, 762, 772, 779, 788, 798, 802, 805, 812, 817, 828, 835, 842, 850, 853, 859, 864, 873, 880, 888, 892, 895, 901, 908, 921, 926, 928, 935, 942, 948, 952, 961, 965, 970, 976, 982, 988, 998, 1001, 1017, 1021, 1030, 1033, 1042, 1057, 1070, 1076, 1090, 1097, 1100, 1105, 1108, 1111, 1123, 1137, 1147, 1159, 1166, 1185, 1188, 1192, 1196, 1200, 1205, 1210, 1215, 1220, 1234, 1245, 1251, 1254, 1259, 1268, 1272, 1277, 1282, 1288, 1295, 1300, 1303, 1312, 1328, 1331, 1337, 1347, 1355, 1359, 1368, 1372, 1384, 1387, 1397, 1400, 1407, 1415, 1422, 1425, 1432, 1435, 1440, 1446, 1454, 1460, 1466, 1474, 1479, 1486, 1493, 1501, 1508, 1513, 1518, 1525, 1529, 1531, 1535, 1538, 1543, 1548, 1553, 1557, 1561, 1565, 1571, 1574, 1577, 1580, 1586}
 
 func (i FeatureID) String() string {
 	if i < 0 || i >= FeatureID(len(_FeatureID_index)-1) {
diff --git a/vendor/github.com/minio/minio-go/v7/CREDITS b/vendor/github.com/minio/minio-go/v7/CREDITS
index dce3d4c9..154c9fd5 100644
--- a/vendor/github.com/minio/minio-go/v7/CREDITS
+++ b/vendor/github.com/minio/minio-go/v7/CREDITS
@@ -79,6 +79,33 @@ SOFTWARE.
 
 ================================================================
 
+github.com/goccy/go-json
+https://github.com/goccy/go-json
+----------------------------------------------------------------
+MIT License
+
+Copyright (c) 2020 Masaaki Goshima
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+================================================================
+
 github.com/google/uuid
 https://github.com/google/uuid
 ----------------------------------------------------------------
@@ -112,33 +139,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 ================================================================
 
-github.com/json-iterator/go
-https://github.com/json-iterator/go
-----------------------------------------------------------------
-MIT License
-
-Copyright (c) 2016 json-iterator
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
-================================================================
-
 github.com/klauspost/compress
 https://github.com/klauspost/compress
 ----------------------------------------------------------------
@@ -685,683 +685,88 @@ https://github.com/minio/md5-simd
 
 ================================================================
 
-github.com/minio/sha256-simd
-https://github.com/minio/sha256-simd
+github.com/pmezard/go-difflib
+https://github.com/pmezard/go-difflib
 ----------------------------------------------------------------
+Copyright (c) 2013, Patrick Mezard
+All rights reserved.
 
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
 
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
+    Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+    Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+    The names of its contributors may not be used to endorse or promote
+products derived from this software without specific prior written
+permission.
 
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
+================================================================
 
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
+github.com/rs/xid
+https://github.com/rs/xid
+----------------------------------------------------------------
+Copyright (c) 2015 Olivier Poitrey <rs@dailymotion.com>
 
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is furnished
+to do so, subject to the following conditions:
 
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
 
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
 
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
+================================================================
 
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
+github.com/stretchr/testify
+https://github.com/stretchr/testify
+----------------------------------------------------------------
+MIT License
 
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
+Copyright (c) 2012-2018 Mat Ryer and Tyler Bunnell
 
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
 
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-================================================================
-
-github.com/modern-go/concurrent
-https://github.com/modern-go/concurrent
-----------------------------------------------------------------
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-================================================================
-
-github.com/modern-go/reflect2
-https://github.com/modern-go/reflect2
-----------------------------------------------------------------
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-
-================================================================
-
-github.com/pmezard/go-difflib
-https://github.com/pmezard/go-difflib
-----------------------------------------------------------------
-Copyright (c) 2013, Patrick Mezard
-All rights reserved.
-
-Redistribution and use in source and binary forms, with or without
-modification, are permitted provided that the following conditions are
-met:
-
-    Redistributions of source code must retain the above copyright
-notice, this list of conditions and the following disclaimer.
-    Redistributions in binary form must reproduce the above copyright
-notice, this list of conditions and the following disclaimer in the
-documentation and/or other materials provided with the distribution.
-    The names of its contributors may not be used to endorse or promote
-products derived from this software without specific prior written
-permission.
-
-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
-IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
-TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
-PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
-TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
-PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
-LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
-NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
-SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-================================================================
-
-github.com/rs/xid
-https://github.com/rs/xid
-----------------------------------------------------------------
-Copyright (c) 2015 Olivier Poitrey <rs@dailymotion.com>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is furnished
-to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
 
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-THE SOFTWARE.
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
 
 ================================================================
 
diff --git a/vendor/github.com/minio/minio-go/v7/Makefile b/vendor/github.com/minio/minio-go/v7/Makefile
index 68444aa6..9e4ddc4c 100644
--- a/vendor/github.com/minio/minio-go/v7/Makefile
+++ b/vendor/github.com/minio/minio-go/v7/Makefile
@@ -32,6 +32,10 @@ functional-test:
 	@GO111MODULE=on go build -race functional_tests.go
 	@SERVER_ENDPOINT=localhost:9000 ACCESS_KEY=minioadmin SECRET_KEY=minioadmin ENABLE_HTTPS=1 MINT_MODE=full ./functional_tests
 
+functional-test-notls:
+	@GO111MODULE=on go build -race functional_tests.go
+	@SERVER_ENDPOINT=localhost:9000 ACCESS_KEY=minioadmin SECRET_KEY=minioadmin ENABLE_HTTPS=0 MINT_MODE=full ./functional_tests
+
 clean:
 	@echo "Cleaning up all the generated files"
 	@find . -name '*.test' | xargs rm -fv
diff --git a/vendor/github.com/minio/minio-go/v7/README.md b/vendor/github.com/minio/minio-go/v7/README.md
index 82f70a13..be7963c5 100644
--- a/vendor/github.com/minio/minio-go/v7/README.md
+++ b/vendor/github.com/minio/minio-go/v7/README.md
@@ -253,7 +253,7 @@ The full API Reference is available here.
 
 * [setbucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/setbucketencryption.go)
 * [getbucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/getbucketencryption.go)
-* [deletebucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/deletebucketencryption.go)
+* [removebucketencryption.go](https://github.com/minio/minio-go/blob/master/examples/s3/removebucketencryption.go)
 
 ### Full Examples : Bucket replication Operations
 
diff --git a/vendor/github.com/minio/minio-go/v7/api-bucket-cors.go b/vendor/github.com/minio/minio-go/v7/api-bucket-cors.go
new file mode 100644
index 00000000..8bf537f7
--- /dev/null
+++ b/vendor/github.com/minio/minio-go/v7/api-bucket-cors.go
@@ -0,0 +1,136 @@
+/*
+ * MinIO Go Library for Amazon S3 Compatible Cloud Storage
+ * Copyright 2024 MinIO, Inc.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package minio
+
+import (
+	"bytes"
+	"context"
+	"net/http"
+	"net/url"
+
+	"github.com/minio/minio-go/v7/pkg/cors"
+	"github.com/minio/minio-go/v7/pkg/s3utils"
+)
+
+// SetBucketCors sets the cors configuration for the bucket
+func (c *Client) SetBucketCors(ctx context.Context, bucketName string, corsConfig *cors.Config) error {
+	if err := s3utils.CheckValidBucketName(bucketName); err != nil {
+		return err
+	}
+
+	if corsConfig == nil {
+		return c.removeBucketCors(ctx, bucketName)
+	}
+
+	return c.putBucketCors(ctx, bucketName, corsConfig)
+}
+
+func (c *Client) putBucketCors(ctx context.Context, bucketName string, corsConfig *cors.Config) error {
+	urlValues := make(url.Values)
+	urlValues.Set("cors", "")
+
+	corsStr, err := corsConfig.ToXML()
+	if err != nil {
+		return err
+	}
+
+	reqMetadata := requestMetadata{
+		bucketName:       bucketName,
+		queryValues:      urlValues,
+		contentBody:      bytes.NewReader(corsStr),
+		contentLength:    int64(len(corsStr)),
+		contentMD5Base64: sumMD5Base64([]byte(corsStr)),
+	}
+
+	resp, err := c.executeMethod(ctx, http.MethodPut, reqMetadata)
+	defer closeResponse(resp)
+	if err != nil {
+		return err
+	}
+	if resp != nil {
+		if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusNoContent {
+			return httpRespToErrorResponse(resp, bucketName, "")
+		}
+	}
+	return nil
+}
+
+func (c *Client) removeBucketCors(ctx context.Context, bucketName string) error {
+	urlValues := make(url.Values)
+	urlValues.Set("cors", "")
+
+	resp, err := c.executeMethod(ctx, http.MethodDelete, requestMetadata{
+		bucketName:       bucketName,
+		queryValues:      urlValues,
+		contentSHA256Hex: emptySHA256Hex,
+	})
+	defer closeResponse(resp)
+	if err != nil {
+		return err
+	}
+
+	if resp.StatusCode != http.StatusNoContent {
+		return httpRespToErrorResponse(resp, bucketName, "")
+	}
+
+	return nil
+}
+
+// GetBucketCors returns the current cors
+func (c *Client) GetBucketCors(ctx context.Context, bucketName string) (*cors.Config, error) {
+	if err := s3utils.CheckValidBucketName(bucketName); err != nil {
+		return nil, err
+	}
+	bucketCors, err := c.getBucketCors(ctx, bucketName)
+	if err != nil {
+		errResponse := ToErrorResponse(err)
+		if errResponse.Code == "NoSuchCORSConfiguration" {
+			return nil, nil
+		}
+		return nil, err
+	}
+	return bucketCors, nil
+}
+
+func (c *Client) getBucketCors(ctx context.Context, bucketName string) (*cors.Config, error) {
+	urlValues := make(url.Values)
+	urlValues.Set("cors", "")
+
+	resp, err := c.executeMethod(ctx, http.MethodGet, requestMetadata{
+		bucketName:       bucketName,
+		queryValues:      urlValues,
+		contentSHA256Hex: emptySHA256Hex, // TODO: needed? copied over from other example, but not spec'd in API.
+	})
+
+	defer closeResponse(resp)
+	if err != nil {
+		return nil, err
+	}
+
+	if resp != nil {
+		if resp.StatusCode != http.StatusOK {
+			return nil, httpRespToErrorResponse(resp, bucketName, "")
+		}
+	}
+
+	corsConfig, err := cors.ParseBucketCorsConfig(resp.Body)
+	if err != nil {
+		return nil, err
+	}
+
+	return corsConfig, nil
+}
diff --git a/vendor/github.com/minio/minio-go/v7/api-bucket-notification.go b/vendor/github.com/minio/minio-go/v7/api-bucket-notification.go
index 8de5c010..ad8eada4 100644
--- a/vendor/github.com/minio/minio-go/v7/api-bucket-notification.go
+++ b/vendor/github.com/minio/minio-go/v7/api-bucket-notification.go
@@ -26,7 +26,7 @@ import (
 	"net/url"
 	"time"
 
-	jsoniter "github.com/json-iterator/go"
+	"github.com/goccy/go-json"
 	"github.com/minio/minio-go/v7/pkg/notification"
 	"github.com/minio/minio-go/v7/pkg/s3utils"
 )
@@ -207,7 +207,6 @@ func (c *Client) ListenBucketNotification(ctx context.Context, bucketName, prefi
 			// Use a higher buffer to support unexpected
 			// caching done by proxies
 			bio.Buffer(notificationEventBuffer, notificationCapacity)
-			json := jsoniter.ConfigCompatibleWithStandardLibrary
 
 			// Unmarshal each line, returns marshaled values.
 			for bio.Scan() {
diff --git a/vendor/github.com/minio/minio-go/v7/api-compose-object.go b/vendor/github.com/minio/minio-go/v7/api-compose-object.go
index 8c12c355..bb595626 100644
--- a/vendor/github.com/minio/minio-go/v7/api-compose-object.go
+++ b/vendor/github.com/minio/minio-go/v7/api-compose-object.go
@@ -119,7 +119,7 @@ func (opts CopyDestOptions) Marshal(header http.Header) {
 	if opts.ReplaceMetadata {
 		header.Set("x-amz-metadata-directive", replaceDirective)
 		for k, v := range filterCustomMeta(opts.UserMetadata) {
-			if isAmzHeader(k) || isStandardHeader(k) || isStorageClassHeader(k) || isValidReplicationEncryptionHeader(k) {
+			if isAmzHeader(k) || isStandardHeader(k) || isStorageClassHeader(k) || isMinioHeader(k) {
 				header.Set(k, v)
 			} else {
 				header.Set("x-amz-meta-"+k, v)
diff --git a/vendor/github.com/minio/minio-go/v7/api-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-datatypes.go
index 97a6f80b..8a8fd889 100644
--- a/vendor/github.com/minio/minio-go/v7/api-datatypes.go
+++ b/vendor/github.com/minio/minio-go/v7/api-datatypes.go
@@ -143,10 +143,11 @@ type UploadInfo struct {
 	// Verified checksum values, if any.
 	// Values are base64 (standard) encoded.
 	// For multipart objects this is a checksum of the checksum of each part.
-	ChecksumCRC32  string
-	ChecksumCRC32C string
-	ChecksumSHA1   string
-	ChecksumSHA256 string
+	ChecksumCRC32     string
+	ChecksumCRC32C    string
+	ChecksumSHA1      string
+	ChecksumSHA256    string
+	ChecksumCRC64NVME string
 }
 
 // RestoreInfo contains information of the restore operation of an archived object
@@ -215,10 +216,11 @@ type ObjectInfo struct {
 	Restore *RestoreInfo
 
 	// Checksum values
-	ChecksumCRC32  string
-	ChecksumCRC32C string
-	ChecksumSHA1   string
-	ChecksumSHA256 string
+	ChecksumCRC32     string
+	ChecksumCRC32C    string
+	ChecksumSHA1      string
+	ChecksumSHA256    string
+	ChecksumCRC64NVME string
 
 	Internal *struct {
 		K int // Data blocks
diff --git a/vendor/github.com/minio/minio-go/v7/api-get-object-file.go b/vendor/github.com/minio/minio-go/v7/api-get-object-file.go
index 2332dbf1..567a42e4 100644
--- a/vendor/github.com/minio/minio-go/v7/api-get-object-file.go
+++ b/vendor/github.com/minio/minio-go/v7/api-get-object-file.go
@@ -69,7 +69,7 @@ func (c *Client) FGetObject(ctx context.Context, bucketName, objectName, filePat
 	}
 
 	// Write to a temporary file "fileName.part.minio" before saving.
-	filePartPath := filePath + objectStat.ETag + ".part.minio"
+	filePartPath := filePath + sum256Hex([]byte(objectStat.ETag)) + ".part.minio"
 
 	// If exists, open in append mode. If not create it as a part file.
 	filePart, err := os.OpenFile(filePartPath, os.O_CREATE|os.O_APPEND|os.O_WRONLY, 0o600)
diff --git a/vendor/github.com/minio/minio-go/v7/api-get-object.go b/vendor/github.com/minio/minio-go/v7/api-get-object.go
index 9e6b1543..5cc85f61 100644
--- a/vendor/github.com/minio/minio-go/v7/api-get-object.go
+++ b/vendor/github.com/minio/minio-go/v7/api-get-object.go
@@ -32,10 +32,18 @@ import (
 func (c *Client) GetObject(ctx context.Context, bucketName, objectName string, opts GetObjectOptions) (*Object, error) {
 	// Input validation.
 	if err := s3utils.CheckValidBucketName(bucketName); err != nil {
-		return nil, err
+		return nil, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "InvalidBucketName",
+			Message:    err.Error(),
+		}
 	}
 	if err := s3utils.CheckValidObjectName(objectName); err != nil {
-		return nil, err
+		return nil, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "XMinioInvalidObjectName",
+			Message:    err.Error(),
+		}
 	}
 
 	gctx, cancel := context.WithCancel(ctx)
@@ -310,7 +318,7 @@ func (o *Object) doGetRequest(request getRequest) (getResponse, error) {
 	response := <-o.resCh
 
 	// Return any error to the top level.
-	if response.Error != nil {
+	if response.Error != nil && response.Error != io.EOF {
 		return response, response.Error
 	}
 
@@ -332,7 +340,7 @@ func (o *Object) doGetRequest(request getRequest) (getResponse, error) {
 	// Data are ready on the wire, no need to reinitiate connection in lower level
 	o.seekData = false
 
-	return response, nil
+	return response, response.Error
 }
 
 // setOffset - handles the setting of offsets for
@@ -649,10 +657,18 @@ func newObject(ctx context.Context, cancel context.CancelFunc, reqCh chan<- getR
 func (c *Client) getObject(ctx context.Context, bucketName, objectName string, opts GetObjectOptions) (io.ReadCloser, ObjectInfo, http.Header, error) {
 	// Validate input arguments.
 	if err := s3utils.CheckValidBucketName(bucketName); err != nil {
-		return nil, ObjectInfo{}, nil, err
+		return nil, ObjectInfo{}, nil, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "InvalidBucketName",
+			Message:    err.Error(),
+		}
 	}
 	if err := s3utils.CheckValidObjectName(objectName); err != nil {
-		return nil, ObjectInfo{}, nil, err
+		return nil, ObjectInfo{}, nil, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "XMinioInvalidObjectName",
+			Message:    err.Error(),
+		}
 	}
 
 	// Execute GET on objectName.
diff --git a/vendor/github.com/minio/minio-go/v7/api-prompt-object.go b/vendor/github.com/minio/minio-go/v7/api-prompt-object.go
new file mode 100644
index 00000000..dac062a7
--- /dev/null
+++ b/vendor/github.com/minio/minio-go/v7/api-prompt-object.go
@@ -0,0 +1,78 @@
+/*
+ * MinIO Go Library for Amazon S3 Compatible Cloud Storage
+ * Copyright 2015-2024 MinIO, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package minio
+
+import (
+	"bytes"
+	"context"
+	"io"
+	"net/http"
+
+	"github.com/goccy/go-json"
+	"github.com/minio/minio-go/v7/pkg/s3utils"
+)
+
+// PromptObject performs language model inference with the prompt and referenced object as context.
+// Inference is performed using a Lambda handler that can process the prompt and object.
+// Currently, this functionality is limited to certain MinIO servers.
+func (c *Client) PromptObject(ctx context.Context, bucketName, objectName, prompt string, opts PromptObjectOptions) (io.ReadCloser, error) {
+	// Input validation.
+	if err := s3utils.CheckValidBucketName(bucketName); err != nil {
+		return nil, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "InvalidBucketName",
+			Message:    err.Error(),
+		}
+	}
+	if err := s3utils.CheckValidObjectName(objectName); err != nil {
+		return nil, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "XMinioInvalidObjectName",
+			Message:    err.Error(),
+		}
+	}
+
+	opts.AddLambdaArnToReqParams(opts.LambdaArn)
+	opts.SetHeader("Content-Type", "application/json")
+	opts.AddPromptArg("prompt", prompt)
+	promptReqBytes, err := json.Marshal(opts.PromptArgs)
+	if err != nil {
+		return nil, err
+	}
+
+	// Execute POST on bucket/object.
+	resp, err := c.executeMethod(ctx, http.MethodPost, requestMetadata{
+		bucketName:       bucketName,
+		objectName:       objectName,
+		queryValues:      opts.toQueryValues(),
+		customHeader:     opts.Header(),
+		contentSHA256Hex: sum256Hex(promptReqBytes),
+		contentBody:      bytes.NewReader(promptReqBytes),
+		contentLength:    int64(len(promptReqBytes)),
+	})
+	if err != nil {
+		return nil, err
+	}
+
+	if resp.StatusCode != http.StatusOK {
+		defer closeResponse(resp)
+		return nil, httpRespToErrorResponse(resp, bucketName, objectName)
+	}
+
+	return resp.Body, nil
+}
diff --git a/vendor/github.com/minio/minio-go/v7/api-prompt-options.go b/vendor/github.com/minio/minio-go/v7/api-prompt-options.go
new file mode 100644
index 00000000..4493a75d
--- /dev/null
+++ b/vendor/github.com/minio/minio-go/v7/api-prompt-options.go
@@ -0,0 +1,84 @@
+/*
+ * MinIO Go Library for Amazon S3 Compatible Cloud Storage
+ * Copyright 2015-2024 MinIO, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package minio
+
+import (
+	"net/http"
+	"net/url"
+)
+
+// PromptObjectOptions provides options to PromptObject call.
+// LambdaArn is the ARN of the Prompt Lambda to be invoked.
+// PromptArgs is a map of key-value pairs to be passed to the inference action on the Prompt Lambda.
+// "prompt" is a reserved key and should not be used as a key in PromptArgs.
+type PromptObjectOptions struct {
+	LambdaArn  string
+	PromptArgs map[string]any
+	headers    map[string]string
+	reqParams  url.Values
+}
+
+// Header returns the http.Header representation of the POST options.
+func (o PromptObjectOptions) Header() http.Header {
+	headers := make(http.Header, len(o.headers))
+	for k, v := range o.headers {
+		headers.Set(k, v)
+	}
+	return headers
+}
+
+// AddPromptArg Add a key value pair to the prompt arguments where the key is a string and
+// the value is a JSON serializable.
+func (o *PromptObjectOptions) AddPromptArg(key string, value any) {
+	if o.PromptArgs == nil {
+		o.PromptArgs = make(map[string]any)
+	}
+	o.PromptArgs[key] = value
+}
+
+// AddLambdaArnToReqParams adds the lambdaArn to the request query string parameters.
+func (o *PromptObjectOptions) AddLambdaArnToReqParams(lambdaArn string) {
+	if o.reqParams == nil {
+		o.reqParams = make(url.Values)
+	}
+	o.reqParams.Add("lambdaArn", lambdaArn)
+}
+
+// SetHeader adds a key value pair to the options. The
+// key-value pair will be part of the HTTP POST request
+// headers.
+func (o *PromptObjectOptions) SetHeader(key, value string) {
+	if o.headers == nil {
+		o.headers = make(map[string]string)
+	}
+	o.headers[http.CanonicalHeaderKey(key)] = value
+}
+
+// toQueryValues - Convert the reqParams in Options to query string parameters.
+func (o *PromptObjectOptions) toQueryValues() url.Values {
+	urlValues := make(url.Values)
+	if o.reqParams != nil {
+		for key, values := range o.reqParams {
+			for _, value := range values {
+				urlValues.Add(key, value)
+			}
+		}
+	}
+
+	return urlValues
+}
diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go b/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go
index 0ae9142e..3023b949 100644
--- a/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go
+++ b/vendor/github.com/minio/minio-go/v7/api-put-object-fan-out.go
@@ -85,7 +85,10 @@ func (c *Client) PutObjectFanOut(ctx context.Context, bucket string, fanOutData
 	policy.SetEncryption(fanOutReq.SSE)
 
 	// Set checksum headers if any.
-	policy.SetChecksum(fanOutReq.Checksum)
+	err := policy.SetChecksum(fanOutReq.Checksum)
+	if err != nil {
+		return nil, err
+	}
 
 	url, formData, err := c.PresignedPostPolicy(ctx, policy)
 	if err != nil {
diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go
index 5f117afa..03bd34f7 100644
--- a/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go
+++ b/vendor/github.com/minio/minio-go/v7/api-put-object-multipart.go
@@ -24,7 +24,6 @@ import (
 	"encoding/hex"
 	"encoding/xml"
 	"fmt"
-	"hash/crc32"
 	"io"
 	"net/http"
 	"net/url"
@@ -84,10 +83,7 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
 	// HTTPS connection.
 	hashAlgos, hashSums := c.hashMaterials(opts.SendContentMd5, !opts.DisableContentSha256)
 	if len(hashSums) == 0 {
-		if opts.UserMetadata == nil {
-			opts.UserMetadata = make(map[string]string, 1)
-		}
-		opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C"
+		addAutoChecksumHeaders(&opts)
 	}
 
 	// Initiate a new multipart upload.
@@ -114,9 +110,8 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
 
 	// Create checksums
 	// CRC32C is ~50% faster on AMD64 @ 30GB/s
-	var crcBytes []byte
 	customHeader := make(http.Header)
-	crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
+	crc := opts.AutoChecksum.Hasher()
 	for partNumber <= totalPartsCount {
 		length, rErr := readFull(reader, buf)
 		if rErr == io.EOF && partNumber > 1 {
@@ -154,8 +149,7 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
 			crc.Reset()
 			crc.Write(buf[:length])
 			cSum := crc.Sum(nil)
-			customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum))
-			crcBytes = append(crcBytes, cSum...)
+			customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum))
 		}
 
 		p := uploadPartParams{bucketName: bucketName, objectName: objectName, uploadID: uploadID, reader: rd, partNumber: partNumber, md5Base64: md5Base64, sha256Hex: sha256Hex, size: int64(length), sse: opts.ServerSideEncryption, streamSha256: !opts.DisableContentSha256, customHeader: customHeader}
@@ -183,18 +177,21 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
 
 	// Loop over total uploaded parts to save them in
 	// Parts array before completing the multipart request.
+	allParts := make([]ObjectPart, 0, len(partsInfo))
 	for i := 1; i < partNumber; i++ {
 		part, ok := partsInfo[i]
 		if !ok {
 			return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
 		}
+		allParts = append(allParts, part)
 		complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
-			ETag:           part.ETag,
-			PartNumber:     part.PartNumber,
-			ChecksumCRC32:  part.ChecksumCRC32,
-			ChecksumCRC32C: part.ChecksumCRC32C,
-			ChecksumSHA1:   part.ChecksumSHA1,
-			ChecksumSHA256: part.ChecksumSHA256,
+			ETag:              part.ETag,
+			PartNumber:        part.PartNumber,
+			ChecksumCRC32:     part.ChecksumCRC32,
+			ChecksumCRC32C:    part.ChecksumCRC32C,
+			ChecksumSHA1:      part.ChecksumSHA1,
+			ChecksumSHA256:    part.ChecksumSHA256,
+			ChecksumCRC64NVME: part.ChecksumCRC64NVME,
 		})
 	}
 
@@ -202,13 +199,10 @@ func (c *Client) putObjectMultipartNoStream(ctx context.Context, bucketName, obj
 	sort.Sort(completedParts(complMultipartUpload.Parts))
 	opts = PutObjectOptions{
 		ServerSideEncryption: opts.ServerSideEncryption,
+		AutoChecksum:         opts.AutoChecksum,
 	}
-	if len(crcBytes) > 0 {
-		// Add hash of hashes.
-		crc.Reset()
-		crc.Write(crcBytes)
-		opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))}
-	}
+	applyAutoChecksum(&opts, allParts)
+
 	uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
 	if err != nil {
 		return UploadInfo{}, err
@@ -354,10 +348,11 @@ func (c *Client) uploadPart(ctx context.Context, p uploadPartParams) (ObjectPart
 	// Once successfully uploaded, return completed part.
 	h := resp.Header
 	objPart := ObjectPart{
-		ChecksumCRC32:  h.Get("x-amz-checksum-crc32"),
-		ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"),
-		ChecksumSHA1:   h.Get("x-amz-checksum-sha1"),
-		ChecksumSHA256: h.Get("x-amz-checksum-sha256"),
+		ChecksumCRC32:     h.Get(ChecksumCRC32.Key()),
+		ChecksumCRC32C:    h.Get(ChecksumCRC32C.Key()),
+		ChecksumSHA1:      h.Get(ChecksumSHA1.Key()),
+		ChecksumSHA256:    h.Get(ChecksumSHA256.Key()),
+		ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()),
 	}
 	objPart.Size = p.size
 	objPart.PartNumber = p.partNumber
@@ -457,9 +452,10 @@ func (c *Client) completeMultipartUpload(ctx context.Context, bucketName, object
 		Expiration:       expTime,
 		ExpirationRuleID: ruleID,
 
-		ChecksumSHA256: completeMultipartUploadResult.ChecksumSHA256,
-		ChecksumSHA1:   completeMultipartUploadResult.ChecksumSHA1,
-		ChecksumCRC32:  completeMultipartUploadResult.ChecksumCRC32,
-		ChecksumCRC32C: completeMultipartUploadResult.ChecksumCRC32C,
+		ChecksumSHA256:    completeMultipartUploadResult.ChecksumSHA256,
+		ChecksumSHA1:      completeMultipartUploadResult.ChecksumSHA1,
+		ChecksumCRC32:     completeMultipartUploadResult.ChecksumCRC32,
+		ChecksumCRC32C:    completeMultipartUploadResult.ChecksumCRC32C,
+		ChecksumCRC64NVME: completeMultipartUploadResult.ChecksumCRC64NVME,
 	}, nil
 }
diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go
index 9182d4ea..3ff3b69e 100644
--- a/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go
+++ b/vendor/github.com/minio/minio-go/v7/api-put-object-streaming.go
@@ -22,7 +22,6 @@ import (
 	"context"
 	"encoding/base64"
 	"fmt"
-	"hash/crc32"
 	"io"
 	"net/http"
 	"net/url"
@@ -53,7 +52,7 @@ func (c *Client) putObjectMultipartStream(ctx context.Context, bucketName, objec
 	} else {
 		info, err = c.putObjectMultipartStreamOptionalChecksum(ctx, bucketName, objectName, reader, size, opts)
 	}
-	if err != nil {
+	if err != nil && s3utils.IsGoogleEndpoint(*c.endpointURL) {
 		errResp := ToErrorResponse(err)
 		// Verify if multipart functionality is not available, if not
 		// fall back to single PutObject operation.
@@ -109,13 +108,12 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
 	if err != nil {
 		return UploadInfo{}, err
 	}
-
+	if opts.Checksum.IsSet() {
+		opts.AutoChecksum = opts.Checksum
+	}
 	withChecksum := c.trailingHeaderSupport
 	if withChecksum {
-		if opts.UserMetadata == nil {
-			opts.UserMetadata = make(map[string]string, 1)
-		}
-		opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C"
+		addAutoChecksumHeaders(&opts)
 	}
 	// Initiate a new multipart upload.
 	uploadID, err := c.newUploadID(ctx, bucketName, objectName, opts)
@@ -195,10 +193,10 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
 				sectionReader := newHook(io.NewSectionReader(reader, readOffset, partSize), opts.Progress)
 				trailer := make(http.Header, 1)
 				if withChecksum {
-					crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
-					trailer.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(crc.Sum(nil)))
+					crc := opts.AutoChecksum.Hasher()
+					trailer.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(crc.Sum(nil)))
 					sectionReader = newHashReaderWrapper(sectionReader, crc, func(hash []byte) {
-						trailer.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(hash))
+						trailer.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(hash))
 					})
 				}
 
@@ -239,6 +237,7 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
 
 	// Gather the responses as they occur and update any
 	// progress bar.
+	allParts := make([]ObjectPart, 0, totalPartsCount)
 	for u := 1; u <= totalPartsCount; u++ {
 		select {
 		case <-ctx.Done():
@@ -247,16 +246,17 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
 			if uploadRes.Error != nil {
 				return UploadInfo{}, uploadRes.Error
 			}
-
+			allParts = append(allParts, uploadRes.Part)
 			// Update the totalUploadedSize.
 			totalUploadedSize += uploadRes.Size
 			complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
-				ETag:           uploadRes.Part.ETag,
-				PartNumber:     uploadRes.Part.PartNumber,
-				ChecksumCRC32:  uploadRes.Part.ChecksumCRC32,
-				ChecksumCRC32C: uploadRes.Part.ChecksumCRC32C,
-				ChecksumSHA1:   uploadRes.Part.ChecksumSHA1,
-				ChecksumSHA256: uploadRes.Part.ChecksumSHA256,
+				ETag:              uploadRes.Part.ETag,
+				PartNumber:        uploadRes.Part.PartNumber,
+				ChecksumCRC32:     uploadRes.Part.ChecksumCRC32,
+				ChecksumCRC32C:    uploadRes.Part.ChecksumCRC32C,
+				ChecksumSHA1:      uploadRes.Part.ChecksumSHA1,
+				ChecksumSHA256:    uploadRes.Part.ChecksumSHA256,
+				ChecksumCRC64NVME: uploadRes.Part.ChecksumCRC64NVME,
 			})
 		}
 	}
@@ -271,17 +271,10 @@ func (c *Client) putObjectMultipartStreamFromReadAt(ctx context.Context, bucketN
 
 	opts = PutObjectOptions{
 		ServerSideEncryption: opts.ServerSideEncryption,
+		AutoChecksum:         opts.AutoChecksum,
 	}
 	if withChecksum {
-		// Add hash of hashes.
-		crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
-		for _, part := range complMultipartUpload.Parts {
-			cs, err := base64.StdEncoding.DecodeString(part.ChecksumCRC32C)
-			if err == nil {
-				crc.Write(cs)
-			}
-		}
-		opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))}
+		applyAutoChecksum(&opts, allParts)
 	}
 
 	uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
@@ -304,11 +297,13 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
 		return UploadInfo{}, err
 	}
 
+	if opts.Checksum.IsSet() {
+		opts.AutoChecksum = opts.Checksum
+		opts.SendContentMd5 = false
+	}
+
 	if !opts.SendContentMd5 {
-		if opts.UserMetadata == nil {
-			opts.UserMetadata = make(map[string]string, 1)
-		}
-		opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C"
+		addAutoChecksumHeaders(&opts)
 	}
 
 	// Calculate the optimal parts info for a given size.
@@ -335,9 +330,8 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
 
 	// Create checksums
 	// CRC32C is ~50% faster on AMD64 @ 30GB/s
-	var crcBytes []byte
 	customHeader := make(http.Header)
-	crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
+	crc := opts.AutoChecksum.Hasher()
 	md5Hash := c.md5Hasher()
 	defer md5Hash.Close()
 
@@ -381,8 +375,7 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
 			crc.Reset()
 			crc.Write(buf[:length])
 			cSum := crc.Sum(nil)
-			customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum))
-			crcBytes = append(crcBytes, cSum...)
+			customHeader.Set(opts.AutoChecksum.KeyCapitalized(), base64.StdEncoding.EncodeToString(cSum))
 		}
 
 		// Update progress reader appropriately to the latest offset
@@ -413,18 +406,21 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
 
 	// Loop over total uploaded parts to save them in
 	// Parts array before completing the multipart request.
+	allParts := make([]ObjectPart, 0, len(partsInfo))
 	for i := 1; i < partNumber; i++ {
 		part, ok := partsInfo[i]
 		if !ok {
 			return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
 		}
+		allParts = append(allParts, part)
 		complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
-			ETag:           part.ETag,
-			PartNumber:     part.PartNumber,
-			ChecksumCRC32:  part.ChecksumCRC32,
-			ChecksumCRC32C: part.ChecksumCRC32C,
-			ChecksumSHA1:   part.ChecksumSHA1,
-			ChecksumSHA256: part.ChecksumSHA256,
+			ETag:              part.ETag,
+			PartNumber:        part.PartNumber,
+			ChecksumCRC32:     part.ChecksumCRC32,
+			ChecksumCRC32C:    part.ChecksumCRC32C,
+			ChecksumSHA1:      part.ChecksumSHA1,
+			ChecksumSHA256:    part.ChecksumSHA256,
+			ChecksumCRC64NVME: part.ChecksumCRC64NVME,
 		})
 	}
 
@@ -433,13 +429,9 @@ func (c *Client) putObjectMultipartStreamOptionalChecksum(ctx context.Context, b
 
 	opts = PutObjectOptions{
 		ServerSideEncryption: opts.ServerSideEncryption,
+		AutoChecksum:         opts.AutoChecksum,
 	}
-	if len(crcBytes) > 0 {
-		// Add hash of hashes.
-		crc.Reset()
-		crc.Write(crcBytes)
-		opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))}
-	}
+	applyAutoChecksum(&opts, allParts)
 	uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
 	if err != nil {
 		return UploadInfo{}, err
@@ -462,12 +454,12 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
 	if err = s3utils.CheckValidObjectName(objectName); err != nil {
 		return UploadInfo{}, err
 	}
-
+	if opts.Checksum.IsSet() {
+		opts.SendContentMd5 = false
+		opts.AutoChecksum = opts.Checksum
+	}
 	if !opts.SendContentMd5 {
-		if opts.UserMetadata == nil {
-			opts.UserMetadata = make(map[string]string, 1)
-		}
-		opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C"
+		addAutoChecksumHeaders(&opts)
 	}
 
 	// Cancel all when an error occurs.
@@ -499,8 +491,7 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
 
 	// Create checksums
 	// CRC32C is ~50% faster on AMD64 @ 30GB/s
-	var crcBytes []byte
-	crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
+	crc := opts.AutoChecksum.Hasher()
 
 	// Total data read and written to server. should be equal to 'size' at the end of the call.
 	var totalUploadedSize int64
@@ -554,12 +545,11 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
 		// Calculate md5sum.
 		customHeader := make(http.Header)
 		if !opts.SendContentMd5 {
-			// Add CRC32C instead.
+			// Add Checksum instead.
 			crc.Reset()
 			crc.Write(buf[:length])
 			cSum := crc.Sum(nil)
-			customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum))
-			crcBytes = append(crcBytes, cSum...)
+			customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum))
 		}
 
 		wg.Add(1)
@@ -619,31 +609,33 @@ func (c *Client) putObjectMultipartStreamParallel(ctx context.Context, bucketNam
 
 	// Loop over total uploaded parts to save them in
 	// Parts array before completing the multipart request.
+	allParts := make([]ObjectPart, 0, len(partsInfo))
 	for i := 1; i < partNumber; i++ {
 		part, ok := partsInfo[i]
 		if !ok {
 			return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
 		}
+		allParts = append(allParts, part)
 		complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
-			ETag:           part.ETag,
-			PartNumber:     part.PartNumber,
-			ChecksumCRC32:  part.ChecksumCRC32,
-			ChecksumCRC32C: part.ChecksumCRC32C,
-			ChecksumSHA1:   part.ChecksumSHA1,
-			ChecksumSHA256: part.ChecksumSHA256,
+			ETag:              part.ETag,
+			PartNumber:        part.PartNumber,
+			ChecksumCRC32:     part.ChecksumCRC32,
+			ChecksumCRC32C:    part.ChecksumCRC32C,
+			ChecksumSHA1:      part.ChecksumSHA1,
+			ChecksumSHA256:    part.ChecksumSHA256,
+			ChecksumCRC64NVME: part.ChecksumCRC64NVME,
 		})
 	}
 
 	// Sort all completed parts.
 	sort.Sort(completedParts(complMultipartUpload.Parts))
 
-	opts = PutObjectOptions{}
-	if len(crcBytes) > 0 {
-		// Add hash of hashes.
-		crc.Reset()
-		crc.Write(crcBytes)
-		opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))}
+	opts = PutObjectOptions{
+		ServerSideEncryption: opts.ServerSideEncryption,
+		AutoChecksum:         opts.AutoChecksum,
 	}
+	applyAutoChecksum(&opts, allParts)
+
 	uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
 	if err != nil {
 		return UploadInfo{}, err
@@ -673,6 +665,9 @@ func (c *Client) putObject(ctx context.Context, bucketName, objectName string, r
 	if opts.SendContentMd5 && s3utils.IsGoogleEndpoint(*c.endpointURL) && size < 0 {
 		return UploadInfo{}, errInvalidArgument("MD5Sum cannot be calculated with size '-1'")
 	}
+	if opts.Checksum.IsSet() {
+		opts.SendContentMd5 = false
+	}
 
 	var readSeeker io.Seeker
 	if size > 0 {
@@ -742,17 +737,6 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string,
 	// Set headers.
 	customHeader := opts.Header()
 
-	// Add CRC when client supports it, MD5 is not set, not Google and we don't add SHA256 to chunks.
-	addCrc := c.trailingHeaderSupport && md5Base64 == "" && !s3utils.IsGoogleEndpoint(*c.endpointURL) && (opts.DisableContentSha256 || c.secure)
-
-	if addCrc {
-		// If user has added checksums, don't add them ourselves.
-		for k := range opts.UserMetadata {
-			if strings.HasPrefix(strings.ToLower(k), "x-amz-checksum-") {
-				addCrc = false
-			}
-		}
-	}
 	// Populate request metadata.
 	reqMetadata := requestMetadata{
 		bucketName:       bucketName,
@@ -763,8 +747,24 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string,
 		contentMD5Base64: md5Base64,
 		contentSHA256Hex: sha256Hex,
 		streamSha256:     !opts.DisableContentSha256,
-		addCrc:           addCrc,
 	}
+	// Add CRC when client supports it, MD5 is not set, not Google and we don't add SHA256 to chunks.
+	addCrc := c.trailingHeaderSupport && md5Base64 == "" && !s3utils.IsGoogleEndpoint(*c.endpointURL) && (opts.DisableContentSha256 || c.secure)
+	if opts.Checksum.IsSet() {
+		reqMetadata.addCrc = &opts.Checksum
+	} else if addCrc {
+		// If user has added checksums, don't add them ourselves.
+		for k := range opts.UserMetadata {
+			if strings.HasPrefix(strings.ToLower(k), "x-amz-checksum-") {
+				addCrc = false
+			}
+		}
+		if addCrc {
+			opts.AutoChecksum.SetDefault(ChecksumCRC32C)
+			reqMetadata.addCrc = &opts.AutoChecksum
+		}
+	}
+
 	if opts.Internal.SourceVersionID != "" {
 		if opts.Internal.SourceVersionID != nullVersionID {
 			if _, err := uuid.Parse(opts.Internal.SourceVersionID); err != nil {
@@ -801,9 +801,10 @@ func (c *Client) putObjectDo(ctx context.Context, bucketName, objectName string,
 		ExpirationRuleID: ruleID,
 
 		// Checksum values
-		ChecksumCRC32:  h.Get("x-amz-checksum-crc32"),
-		ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"),
-		ChecksumSHA1:   h.Get("x-amz-checksum-sha1"),
-		ChecksumSHA256: h.Get("x-amz-checksum-sha256"),
+		ChecksumCRC32:     h.Get(ChecksumCRC32.Key()),
+		ChecksumCRC32C:    h.Get(ChecksumCRC32C.Key()),
+		ChecksumSHA1:      h.Get(ChecksumSHA1.Key()),
+		ChecksumSHA256:    h.Get(ChecksumSHA256.Key()),
+		ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()),
 	}, nil
 }
diff --git a/vendor/github.com/minio/minio-go/v7/api-put-object.go b/vendor/github.com/minio/minio-go/v7/api-put-object.go
index 4dec6040..09817578 100644
--- a/vendor/github.com/minio/minio-go/v7/api-put-object.go
+++ b/vendor/github.com/minio/minio-go/v7/api-put-object.go
@@ -23,7 +23,6 @@ import (
 	"encoding/base64"
 	"errors"
 	"fmt"
-	"hash/crc32"
 	"io"
 	"net/http"
 	"sort"
@@ -46,6 +45,8 @@ const (
 	ReplicationStatusFailed ReplicationStatus = "FAILED"
 	// ReplicationStatusReplica indicates object is a replica of a source
 	ReplicationStatusReplica ReplicationStatus = "REPLICA"
+	// ReplicationStatusReplicaEdge indicates object is a replica of a edge source
+	ReplicationStatusReplicaEdge ReplicationStatus = "REPLICA-EDGE"
 )
 
 // Empty returns true if no replication status set.
@@ -90,6 +91,18 @@ type PutObjectOptions struct {
 	DisableContentSha256    bool
 	DisableMultipart        bool
 
+	// AutoChecksum is the type of checksum that will be added if no other checksum is added,
+	// like MD5 or SHA256 streaming checksum, and it is feasible for the upload type.
+	// If none is specified CRC32C is used, since it is generally the fastest.
+	AutoChecksum ChecksumType
+
+	// Checksum will force a checksum of the specific type.
+	// This requires that the client was created with "TrailingHeaders:true" option,
+	// and that the destination server supports it.
+	// Unavailable with V2 signatures & Google endpoints.
+	// This will disable content MD5 checksums if set.
+	Checksum ChecksumType
+
 	// ConcurrentStreamParts will create NumThreads buffers of PartSize bytes,
 	// fill them serially and upload them in parallel.
 	// This can be used for faster uploads on non-seekable or slow-to-seek input.
@@ -106,7 +119,11 @@ func (opts *PutObjectOptions) SetMatchETag(etag string) {
 	if opts.customHeaders == nil {
 		opts.customHeaders = http.Header{}
 	}
-	opts.customHeaders.Set("If-Match", "\""+etag+"\"")
+	if etag == "*" {
+		opts.customHeaders.Set("If-Match", "*")
+	} else {
+		opts.customHeaders.Set("If-Match", "\""+etag+"\"")
+	}
 }
 
 // SetMatchETagExcept if etag does not match while PUT MinIO returns an
@@ -116,7 +133,11 @@ func (opts *PutObjectOptions) SetMatchETagExcept(etag string) {
 	if opts.customHeaders == nil {
 		opts.customHeaders = http.Header{}
 	}
-	opts.customHeaders.Set("If-None-Match", "\""+etag+"\"")
+	if etag == "*" {
+		opts.customHeaders.Set("If-None-Match", "*")
+	} else {
+		opts.customHeaders.Set("If-None-Match", "\""+etag+"\"")
+	}
 }
 
 // getNumThreads - gets the number of threads to be used in the multipart
@@ -212,7 +233,7 @@ func (opts PutObjectOptions) Header() (header http.Header) {
 	}
 
 	for k, v := range opts.UserMetadata {
-		if isAmzHeader(k) || isStandardHeader(k) || isStorageClassHeader(k) || isValidReplicationEncryptionHeader(k) {
+		if isAmzHeader(k) || isStandardHeader(k) || isStorageClassHeader(k) || isMinioHeader(k) {
 			header.Set(k, v)
 		} else {
 			header.Set("x-amz-meta-"+k, v)
@@ -228,9 +249,9 @@ func (opts PutObjectOptions) Header() (header http.Header) {
 }
 
 // validate() checks if the UserMetadata map has standard headers or and raises an error if so.
-func (opts PutObjectOptions) validate() (err error) {
+func (opts PutObjectOptions) validate(c *Client) (err error) {
 	for k, v := range opts.UserMetadata {
-		if !httpguts.ValidHeaderFieldName(k) || isStandardHeader(k) || isSSEHeader(k) || isStorageClassHeader(k) || isValidReplicationEncryptionHeader(k) {
+		if !httpguts.ValidHeaderFieldName(k) || isStandardHeader(k) || isSSEHeader(k) || isStorageClassHeader(k) || isMinioHeader(k) {
 			return errInvalidArgument(k + " unsupported user defined metadata name")
 		}
 		if !httpguts.ValidHeaderFieldValue(v) {
@@ -243,6 +264,17 @@ func (opts PutObjectOptions) validate() (err error) {
 	if opts.LegalHold != "" && !opts.LegalHold.IsValid() {
 		return errInvalidArgument(opts.LegalHold.String() + " unsupported legal-hold status")
 	}
+	if opts.Checksum.IsSet() {
+		switch {
+		case !c.trailingHeaderSupport:
+			return errInvalidArgument("Checksum requires Client with TrailingHeaders enabled")
+		case c.overrideSignerType.IsV2():
+			return errInvalidArgument("Checksum cannot be used with v2 signatures")
+		case s3utils.IsGoogleEndpoint(*c.endpointURL):
+			return errInvalidArgument("Checksum cannot be used with GCS endpoints")
+		}
+	}
+
 	return nil
 }
 
@@ -279,7 +311,7 @@ func (c *Client) PutObject(ctx context.Context, bucketName, objectName string, r
 		return UploadInfo{}, errors.New("object size must be provided with disable multipart upload")
 	}
 
-	err = opts.validate()
+	err = opts.validate(c)
 	if err != nil {
 		return UploadInfo{}, err
 	}
@@ -292,6 +324,7 @@ func (c *Client) putObjectCommon(ctx context.Context, bucketName, objectName str
 	if size > int64(maxMultipartPutObjectSize) {
 		return UploadInfo{}, errEntityTooLarge(size, maxMultipartPutObjectSize, bucketName, objectName)
 	}
+	opts.AutoChecksum.SetDefault(ChecksumCRC32C)
 
 	// NOTE: Streaming signature is not supported by GCS.
 	if s3utils.IsGoogleEndpoint(*c.endpointURL) {
@@ -320,7 +353,7 @@ func (c *Client) putObjectCommon(ctx context.Context, bucketName, objectName str
 		return c.putObjectMultipartStreamNoLength(ctx, bucketName, objectName, reader, opts)
 	}
 
-	if size < int64(partSize) || opts.DisableMultipart {
+	if size <= int64(partSize) || opts.DisableMultipart {
 		return c.putObject(ctx, bucketName, objectName, reader, size, opts)
 	}
 
@@ -349,11 +382,12 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam
 		return UploadInfo{}, err
 	}
 
+	if opts.Checksum.IsSet() {
+		opts.SendContentMd5 = false
+		opts.AutoChecksum = opts.Checksum
+	}
 	if !opts.SendContentMd5 {
-		if opts.UserMetadata == nil {
-			opts.UserMetadata = make(map[string]string, 1)
-		}
-		opts.UserMetadata["X-Amz-Checksum-Algorithm"] = "CRC32C"
+		addAutoChecksumHeaders(&opts)
 	}
 
 	// Initiate a new multipart upload.
@@ -380,9 +414,8 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam
 
 	// Create checksums
 	// CRC32C is ~50% faster on AMD64 @ 30GB/s
-	var crcBytes []byte
 	customHeader := make(http.Header)
-	crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
+	crc := opts.AutoChecksum.Hasher()
 
 	for partNumber <= totalPartsCount {
 		length, rerr := readFull(reader, buf)
@@ -405,8 +438,7 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam
 			crc.Reset()
 			crc.Write(buf[:length])
 			cSum := crc.Sum(nil)
-			customHeader.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(cSum))
-			crcBytes = append(crcBytes, cSum...)
+			customHeader.Set(opts.AutoChecksum.Key(), base64.StdEncoding.EncodeToString(cSum))
 		}
 
 		// Update progress reader appropriately to the latest offset
@@ -438,31 +470,33 @@ func (c *Client) putObjectMultipartStreamNoLength(ctx context.Context, bucketNam
 
 	// Loop over total uploaded parts to save them in
 	// Parts array before completing the multipart request.
+	allParts := make([]ObjectPart, 0, len(partsInfo))
 	for i := 1; i < partNumber; i++ {
 		part, ok := partsInfo[i]
 		if !ok {
 			return UploadInfo{}, errInvalidArgument(fmt.Sprintf("Missing part number %d", i))
 		}
+		allParts = append(allParts, part)
 		complMultipartUpload.Parts = append(complMultipartUpload.Parts, CompletePart{
-			ETag:           part.ETag,
-			PartNumber:     part.PartNumber,
-			ChecksumCRC32:  part.ChecksumCRC32,
-			ChecksumCRC32C: part.ChecksumCRC32C,
-			ChecksumSHA1:   part.ChecksumSHA1,
-			ChecksumSHA256: part.ChecksumSHA256,
+			ETag:              part.ETag,
+			PartNumber:        part.PartNumber,
+			ChecksumCRC32:     part.ChecksumCRC32,
+			ChecksumCRC32C:    part.ChecksumCRC32C,
+			ChecksumSHA1:      part.ChecksumSHA1,
+			ChecksumSHA256:    part.ChecksumSHA256,
+			ChecksumCRC64NVME: part.ChecksumCRC64NVME,
 		})
 	}
 
 	// Sort all completed parts.
 	sort.Sort(completedParts(complMultipartUpload.Parts))
 
-	opts = PutObjectOptions{}
-	if len(crcBytes) > 0 {
-		// Add hash of hashes.
-		crc.Reset()
-		crc.Write(crcBytes)
-		opts.UserMetadata = map[string]string{"X-Amz-Checksum-Crc32c": base64.StdEncoding.EncodeToString(crc.Sum(nil))}
+	opts = PutObjectOptions{
+		ServerSideEncryption: opts.ServerSideEncryption,
+		AutoChecksum:         opts.AutoChecksum,
 	}
+	applyAutoChecksum(&opts, allParts)
+
 	uploadInfo, err := c.completeMultipartUpload(ctx, bucketName, objectName, uploadID, complMultipartUpload, opts)
 	if err != nil {
 		return UploadInfo{}, err
diff --git a/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go b/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go
index eb4da414..6b6559bf 100644
--- a/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go
+++ b/vendor/github.com/minio/minio-go/v7/api-putobject-snowball.go
@@ -107,7 +107,7 @@ type readSeekCloser interface {
 // Total size should be < 5TB.
 // This function blocks until 'objs' is closed and the content has been uploaded.
 func (c Client) PutObjectsSnowball(ctx context.Context, bucketName string, opts SnowballOptions, objs <-chan SnowballObject) (err error) {
-	err = opts.Opts.validate()
+	err = opts.Opts.validate(&c)
 	if err != nil {
 		return err
 	}
diff --git a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go
index 1527b746..5e015fb8 100644
--- a/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go
+++ b/vendor/github.com/minio/minio-go/v7/api-s3-datatypes.go
@@ -18,6 +18,7 @@
 package minio
 
 import (
+	"encoding/base64"
 	"encoding/xml"
 	"errors"
 	"io"
@@ -276,10 +277,45 @@ type ObjectPart struct {
 	Size int64
 
 	// Checksum values of each part.
-	ChecksumCRC32  string
-	ChecksumCRC32C string
-	ChecksumSHA1   string
-	ChecksumSHA256 string
+	ChecksumCRC32     string
+	ChecksumCRC32C    string
+	ChecksumSHA1      string
+	ChecksumSHA256    string
+	ChecksumCRC64NVME string
+}
+
+// Checksum will return the checksum for the given type.
+// Will return the empty string if not set.
+func (c ObjectPart) Checksum(t ChecksumType) string {
+	switch {
+	case t.Is(ChecksumCRC32C):
+		return c.ChecksumCRC32C
+	case t.Is(ChecksumCRC32):
+		return c.ChecksumCRC32
+	case t.Is(ChecksumSHA1):
+		return c.ChecksumSHA1
+	case t.Is(ChecksumSHA256):
+		return c.ChecksumSHA256
+	case t.Is(ChecksumCRC64NVME):
+		return c.ChecksumCRC64NVME
+	}
+	return ""
+}
+
+// ChecksumRaw returns the decoded checksum from the part.
+func (c ObjectPart) ChecksumRaw(t ChecksumType) ([]byte, error) {
+	b64 := c.Checksum(t)
+	if b64 == "" {
+		return nil, errors.New("no checksum set")
+	}
+	decoded, err := base64.StdEncoding.DecodeString(b64)
+	if err != nil {
+		return nil, err
+	}
+	if len(decoded) != t.RawByteLen() {
+		return nil, errors.New("checksum length mismatch")
+	}
+	return decoded, nil
 }
 
 // ListObjectPartsResult container for ListObjectParts response.
@@ -296,6 +332,12 @@ type ListObjectPartsResult struct {
 	NextPartNumberMarker int
 	MaxParts             int
 
+	// ChecksumAlgorithm will be CRC32, CRC32C, etc.
+	ChecksumAlgorithm string
+
+	// ChecksumType is FULL_OBJECT or COMPOSITE (assume COMPOSITE when unset)
+	ChecksumType string
+
 	// Indicates whether the returned list of parts is truncated.
 	IsTruncated bool
 	ObjectParts []ObjectPart `xml:"Part"`
@@ -320,10 +362,11 @@ type completeMultipartUploadResult struct {
 	ETag     string
 
 	// Checksum values, hash of hashes of parts.
-	ChecksumCRC32  string
-	ChecksumCRC32C string
-	ChecksumSHA1   string
-	ChecksumSHA256 string
+	ChecksumCRC32     string
+	ChecksumCRC32C    string
+	ChecksumSHA1      string
+	ChecksumSHA256    string
+	ChecksumCRC64NVME string
 }
 
 // CompletePart sub container lists individual part numbers and their
@@ -334,10 +377,29 @@ type CompletePart struct {
 	ETag       string
 
 	// Checksum values
-	ChecksumCRC32  string `xml:"ChecksumCRC32,omitempty"`
-	ChecksumCRC32C string `xml:"ChecksumCRC32C,omitempty"`
-	ChecksumSHA1   string `xml:"ChecksumSHA1,omitempty"`
-	ChecksumSHA256 string `xml:"ChecksumSHA256,omitempty"`
+	ChecksumCRC32     string `xml:"ChecksumCRC32,omitempty"`
+	ChecksumCRC32C    string `xml:"ChecksumCRC32C,omitempty"`
+	ChecksumSHA1      string `xml:"ChecksumSHA1,omitempty"`
+	ChecksumSHA256    string `xml:"ChecksumSHA256,omitempty"`
+	ChecksumCRC64NVME string `xml:",omitempty"`
+}
+
+// Checksum will return the checksum for the given type.
+// Will return the empty string if not set.
+func (c CompletePart) Checksum(t ChecksumType) string {
+	switch {
+	case t.Is(ChecksumCRC32C):
+		return c.ChecksumCRC32C
+	case t.Is(ChecksumCRC32):
+		return c.ChecksumCRC32
+	case t.Is(ChecksumSHA1):
+		return c.ChecksumSHA1
+	case t.Is(ChecksumSHA256):
+		return c.ChecksumSHA256
+	case t.Is(ChecksumCRC64NVME):
+		return c.ChecksumCRC64NVME
+	}
+	return ""
 }
 
 // completeMultipartUpload container for completing multipart upload.
diff --git a/vendor/github.com/minio/minio-go/v7/api-stat.go b/vendor/github.com/minio/minio-go/v7/api-stat.go
index b043dc40..11455beb 100644
--- a/vendor/github.com/minio/minio-go/v7/api-stat.go
+++ b/vendor/github.com/minio/minio-go/v7/api-stat.go
@@ -61,10 +61,18 @@ func (c *Client) BucketExists(ctx context.Context, bucketName string) (bool, err
 func (c *Client) StatObject(ctx context.Context, bucketName, objectName string, opts StatObjectOptions) (ObjectInfo, error) {
 	// Input validation.
 	if err := s3utils.CheckValidBucketName(bucketName); err != nil {
-		return ObjectInfo{}, err
+		return ObjectInfo{}, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "InvalidBucketName",
+			Message:    err.Error(),
+		}
 	}
 	if err := s3utils.CheckValidObjectName(objectName); err != nil {
-		return ObjectInfo{}, err
+		return ObjectInfo{}, ErrorResponse{
+			StatusCode: http.StatusBadRequest,
+			Code:       "XMinioInvalidObjectName",
+			Message:    err.Error(),
+		}
 	}
 	headers := opts.Header()
 	if opts.Internal.ReplicationDeleteMarker {
diff --git a/vendor/github.com/minio/minio-go/v7/api.go b/vendor/github.com/minio/minio-go/v7/api.go
index 930e082a..83c003e4 100644
--- a/vendor/github.com/minio/minio-go/v7/api.go
+++ b/vendor/github.com/minio/minio-go/v7/api.go
@@ -23,7 +23,6 @@ import (
 	"encoding/base64"
 	"errors"
 	"fmt"
-	"hash/crc32"
 	"io"
 	"math/rand"
 	"net"
@@ -100,6 +99,7 @@ type Client struct {
 	healthStatus int32
 
 	trailingHeaderSupport bool
+	maxRetries            int
 }
 
 // Options for New method
@@ -124,12 +124,16 @@ type Options struct {
 	// Custom hash routines. Leave nil to use standard.
 	CustomMD5    func() md5simd.Hasher
 	CustomSHA256 func() md5simd.Hasher
+
+	// Number of times a request is retried. Defaults to 10 retries if this option is not configured.
+	// Set to 1 to disable retries.
+	MaxRetries int
 }
 
 // Global constants.
 const (
 	libraryName    = "minio-go"
-	libraryVersion = "v7.0.69"
+	libraryVersion = "v7.0.82"
 )
 
 // User Agent should always following the below style.
@@ -279,6 +283,11 @@ func privateNew(endpoint string, opts *Options) (*Client, error) {
 	// healthcheck is not initialized
 	clnt.healthStatus = unknown
 
+	clnt.maxRetries = MaxRetry
+	if opts.MaxRetries > 0 {
+		clnt.maxRetries = opts.MaxRetries
+	}
+
 	// Return.
 	return clnt, nil
 }
@@ -471,7 +480,7 @@ type requestMetadata struct {
 	contentMD5Base64 string // carries base64 encoded md5sum
 	contentSHA256Hex string // carries hex encoded sha256sum
 	streamSha256     bool
-	addCrc           bool
+	addCrc           *ChecksumType
 	trailer          http.Header // (http.Request).Trailer. Requires v4 signature.
 }
 
@@ -591,9 +600,9 @@ func (c *Client) executeMethod(ctx context.Context, method string, metadata requ
 		return nil, errors.New(c.endpointURL.String() + " is offline.")
 	}
 
-	var retryable bool       // Indicates if request can be retried.
-	var bodySeeker io.Seeker // Extracted seeker from io.Reader.
-	reqRetry := MaxRetry     // Indicates how many times we can retry the request
+	var retryable bool          // Indicates if request can be retried.
+	var bodySeeker io.Seeker    // Extracted seeker from io.Reader.
+	var reqRetry = c.maxRetries // Indicates how many times we can retry the request
 
 	if metadata.contentBody != nil {
 		// Check if body is seekable then it is retryable.
@@ -616,6 +625,18 @@ func (c *Client) executeMethod(ctx context.Context, method string, metadata requ
 		}
 	}
 
+	if metadata.addCrc != nil && metadata.contentLength > 0 {
+		if metadata.trailer == nil {
+			metadata.trailer = make(http.Header, 1)
+		}
+		crc := metadata.addCrc.Hasher()
+		metadata.contentBody = newHashReaderWrapper(metadata.contentBody, crc, func(hash []byte) {
+			// Update trailer when done.
+			metadata.trailer.Set(metadata.addCrc.Key(), base64.StdEncoding.EncodeToString(hash))
+		})
+		metadata.trailer.Set(metadata.addCrc.Key(), base64.StdEncoding.EncodeToString(crc.Sum(nil)))
+	}
+
 	// Create cancel context to control 'newRetryTimer' go routine.
 	retryCtx, cancel := context.WithCancel(ctx)
 
@@ -635,17 +656,6 @@ func (c *Client) executeMethod(ctx context.Context, method string, metadata requ
 			}
 		}
 
-		if metadata.addCrc {
-			if metadata.trailer == nil {
-				metadata.trailer = make(http.Header, 1)
-			}
-			crc := crc32.New(crc32.MakeTable(crc32.Castagnoli))
-			metadata.contentBody = newHashReaderWrapper(metadata.contentBody, crc, func(hash []byte) {
-				// Update trailer when done.
-				metadata.trailer.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(hash))
-			})
-			metadata.trailer.Set("x-amz-checksum-crc32c", base64.StdEncoding.EncodeToString(crc.Sum(nil)))
-		}
 		// Instantiate a new request.
 		var req *http.Request
 		req, err = c.newRequest(ctx, method, metadata)
@@ -661,7 +671,7 @@ func (c *Client) executeMethod(ctx context.Context, method string, metadata requ
 		// Initiate the request.
 		res, err = c.do(req)
 		if err != nil {
-			if isRequestErrorRetryable(err) {
+			if isRequestErrorRetryable(ctx, err) {
 				// Retry the request
 				continue
 			}
diff --git a/vendor/github.com/minio/minio-go/v7/checksum.go b/vendor/github.com/minio/minio-go/v7/checksum.go
index a1f6f434..8e4c27ce 100644
--- a/vendor/github.com/minio/minio-go/v7/checksum.go
+++ b/vendor/github.com/minio/minio-go/v7/checksum.go
@@ -21,10 +21,15 @@ import (
 	"crypto/sha1"
 	"crypto/sha256"
 	"encoding/base64"
+	"encoding/binary"
+	"errors"
 	"hash"
 	"hash/crc32"
+	"hash/crc64"
 	"io"
 	"math/bits"
+	"net/http"
+	"sort"
 )
 
 // ChecksumType contains information about the checksum type.
@@ -40,23 +45,41 @@ const (
 	ChecksumCRC32
 	// ChecksumCRC32C indicates a CRC32 checksum with Castagnoli table.
 	ChecksumCRC32C
+	// ChecksumCRC64NVME indicates CRC64 with 0xad93d23594c93659 polynomial.
+	ChecksumCRC64NVME
 
 	// Keep after all valid checksums
 	checksumLast
 
+	// ChecksumFullObject is a modifier that can be used on CRC32 and CRC32C
+	// to indicate full object checksums.
+	ChecksumFullObject
+
 	// checksumMask is a mask for valid checksum types.
 	checksumMask = checksumLast - 1
 
 	// ChecksumNone indicates no checksum.
 	ChecksumNone ChecksumType = 0
 
-	amzChecksumAlgo   = "x-amz-checksum-algorithm"
-	amzChecksumCRC32  = "x-amz-checksum-crc32"
-	amzChecksumCRC32C = "x-amz-checksum-crc32c"
-	amzChecksumSHA1   = "x-amz-checksum-sha1"
-	amzChecksumSHA256 = "x-amz-checksum-sha256"
+	// ChecksumFullObjectCRC32 indicates full object CRC32
+	ChecksumFullObjectCRC32 = ChecksumCRC32 | ChecksumFullObject
+
+	// ChecksumFullObjectCRC32C indicates full object CRC32C
+	ChecksumFullObjectCRC32C = ChecksumCRC32C | ChecksumFullObject
+
+	amzChecksumAlgo      = "x-amz-checksum-algorithm"
+	amzChecksumCRC32     = "x-amz-checksum-crc32"
+	amzChecksumCRC32C    = "x-amz-checksum-crc32c"
+	amzChecksumSHA1      = "x-amz-checksum-sha1"
+	amzChecksumSHA256    = "x-amz-checksum-sha256"
+	amzChecksumCRC64NVME = "x-amz-checksum-crc64nvme"
 )
 
+// Base returns the base type, without modifiers.
+func (c ChecksumType) Base() ChecksumType {
+	return c & checksumMask
+}
+
 // Is returns if c is all of t.
 func (c ChecksumType) Is(t ChecksumType) bool {
 	return c&t == t
@@ -74,10 +97,44 @@ func (c ChecksumType) Key() string {
 		return amzChecksumSHA1
 	case ChecksumSHA256:
 		return amzChecksumSHA256
+	case ChecksumCRC64NVME:
+		return amzChecksumCRC64NVME
 	}
 	return ""
 }
 
+// CanComposite will return if the checksum type can be used for composite multipart upload on AWS.
+func (c ChecksumType) CanComposite() bool {
+	switch c & checksumMask {
+	case ChecksumSHA256, ChecksumSHA1, ChecksumCRC32, ChecksumCRC32C:
+		return true
+	}
+	return false
+}
+
+// CanMergeCRC will return if the checksum type can be used for multipart upload on AWS.
+func (c ChecksumType) CanMergeCRC() bool {
+	switch c & checksumMask {
+	case ChecksumCRC32, ChecksumCRC32C, ChecksumCRC64NVME:
+		return true
+	}
+	return false
+}
+
+// FullObjectRequested will return if the checksum type indicates full object checksum was requested.
+func (c ChecksumType) FullObjectRequested() bool {
+	switch c & (ChecksumFullObject | checksumMask) {
+	case ChecksumFullObjectCRC32C, ChecksumFullObjectCRC32, ChecksumCRC64NVME:
+		return true
+	}
+	return false
+}
+
+// KeyCapitalized returns the capitalized key as used in HTTP headers.
+func (c ChecksumType) KeyCapitalized() string {
+	return http.CanonicalHeaderKey(c.Key())
+}
+
 // RawByteLen returns the size of the un-encoded checksum.
 func (c ChecksumType) RawByteLen() int {
 	switch c & checksumMask {
@@ -87,10 +144,17 @@ func (c ChecksumType) RawByteLen() int {
 		return sha1.Size
 	case ChecksumSHA256:
 		return sha256.Size
+	case ChecksumCRC64NVME:
+		return crc64.Size
 	}
 	return 0
 }
 
+const crc64NVMEPolynomial = 0xad93d23594c93659
+
+// crc64 uses reversed polynomials.
+var crc64Table = crc64.MakeTable(bits.Reverse64(crc64NVMEPolynomial))
+
 // Hasher returns a hasher corresponding to the checksum type.
 // Returns nil if no checksum.
 func (c ChecksumType) Hasher() hash.Hash {
@@ -103,13 +167,32 @@ func (c ChecksumType) Hasher() hash.Hash {
 		return sha1.New()
 	case ChecksumSHA256:
 		return sha256.New()
+	case ChecksumCRC64NVME:
+		return crc64.New(crc64Table)
 	}
 	return nil
 }
 
 // IsSet returns whether the type is valid and known.
 func (c ChecksumType) IsSet() bool {
-	return bits.OnesCount32(uint32(c)) == 1
+	return bits.OnesCount32(uint32(c&checksumMask)) == 1
+}
+
+// SetDefault will set the checksum if not already set.
+func (c *ChecksumType) SetDefault(t ChecksumType) {
+	if !c.IsSet() {
+		*c = t
+	}
+}
+
+// EncodeToString the encoded hash value of the content provided in b.
+func (c ChecksumType) EncodeToString(b []byte) string {
+	if !c.IsSet() {
+		return ""
+	}
+	h := c.Hasher()
+	h.Write(b)
+	return base64.StdEncoding.EncodeToString(h.Sum(nil))
 }
 
 // String returns the type as a string.
@@ -127,6 +210,8 @@ func (c ChecksumType) String() string {
 		return "SHA256"
 	case ChecksumNone:
 		return ""
+	case ChecksumCRC64NVME:
+		return "CRC64NVME"
 	}
 	return "<invalid>"
 }
@@ -208,3 +293,129 @@ func (c Checksum) Raw() []byte {
 	}
 	return c.r
 }
+
+// CompositeChecksum returns the composite checksum of all provided parts.
+func (c ChecksumType) CompositeChecksum(p []ObjectPart) (*Checksum, error) {
+	if !c.CanComposite() {
+		return nil, errors.New("cannot do composite checksum")
+	}
+	sort.Slice(p, func(i, j int) bool {
+		return p[i].PartNumber < p[j].PartNumber
+	})
+	c = c.Base()
+	crcBytes := make([]byte, 0, len(p)*c.RawByteLen())
+	for _, part := range p {
+		pCrc, err := part.ChecksumRaw(c)
+		if err != nil {
+			return nil, err
+		}
+		crcBytes = append(crcBytes, pCrc...)
+	}
+	h := c.Hasher()
+	h.Write(crcBytes)
+	return &Checksum{Type: c, r: h.Sum(nil)}, nil
+}
+
+// FullObjectChecksum will return the full object checksum from provided parts.
+func (c ChecksumType) FullObjectChecksum(p []ObjectPart) (*Checksum, error) {
+	if !c.CanMergeCRC() {
+		return nil, errors.New("cannot merge this checksum type")
+	}
+	c = c.Base()
+	sort.Slice(p, func(i, j int) bool {
+		return p[i].PartNumber < p[j].PartNumber
+	})
+
+	switch len(p) {
+	case 0:
+		return nil, errors.New("no parts given")
+	case 1:
+		check, err := p[0].ChecksumRaw(c)
+		if err != nil {
+			return nil, err
+		}
+		return &Checksum{
+			Type: c,
+			r:    check,
+		}, nil
+	}
+	var merged uint32
+	var merged64 uint64
+	first, err := p[0].ChecksumRaw(c)
+	if err != nil {
+		return nil, err
+	}
+	sz := p[0].Size
+	switch c {
+	case ChecksumCRC32, ChecksumCRC32C:
+		merged = binary.BigEndian.Uint32(first)
+	case ChecksumCRC64NVME:
+		merged64 = binary.BigEndian.Uint64(first)
+	}
+
+	poly32 := uint32(crc32.IEEE)
+	if c.Is(ChecksumCRC32C) {
+		poly32 = crc32.Castagnoli
+	}
+	for _, part := range p[1:] {
+		if part.Size == 0 {
+			continue
+		}
+		sz += part.Size
+		pCrc, err := part.ChecksumRaw(c)
+		if err != nil {
+			return nil, err
+		}
+		switch c {
+		case ChecksumCRC32, ChecksumCRC32C:
+			merged = crc32Combine(poly32, merged, binary.BigEndian.Uint32(pCrc), part.Size)
+		case ChecksumCRC64NVME:
+			merged64 = crc64Combine(bits.Reverse64(crc64NVMEPolynomial), merged64, binary.BigEndian.Uint64(pCrc), part.Size)
+		}
+	}
+	var tmp [8]byte
+	switch c {
+	case ChecksumCRC32, ChecksumCRC32C:
+		binary.BigEndian.PutUint32(tmp[:], merged)
+		return &Checksum{
+			Type: c,
+			r:    tmp[:4],
+		}, nil
+	case ChecksumCRC64NVME:
+		binary.BigEndian.PutUint64(tmp[:], merged64)
+		return &Checksum{
+			Type: c,
+			r:    tmp[:8],
+		}, nil
+	default:
+		return nil, errors.New("unknown checksum type")
+	}
+}
+
+func addAutoChecksumHeaders(opts *PutObjectOptions) {
+	if opts.UserMetadata == nil {
+		opts.UserMetadata = make(map[string]string, 1)
+	}
+	opts.UserMetadata["X-Amz-Checksum-Algorithm"] = opts.AutoChecksum.String()
+	if opts.AutoChecksum.FullObjectRequested() {
+		opts.UserMetadata["X-Amz-Checksum-Type"] = "FULL_OBJECT"
+	}
+}
+
+func applyAutoChecksum(opts *PutObjectOptions, allParts []ObjectPart) {
+	if !opts.AutoChecksum.IsSet() {
+		return
+	}
+	if opts.AutoChecksum.CanComposite() && !opts.AutoChecksum.Is(ChecksumFullObject) {
+		// Add composite hash of hashes.
+		crc, err := opts.AutoChecksum.CompositeChecksum(allParts)
+		if err == nil {
+			opts.UserMetadata = map[string]string{opts.AutoChecksum.Key(): crc.Encoded()}
+		}
+	} else if opts.AutoChecksum.CanMergeCRC() {
+		crc, err := opts.AutoChecksum.FullObjectChecksum(allParts)
+		if err == nil {
+			opts.UserMetadata = map[string]string{opts.AutoChecksum.KeyCapitalized(): crc.Encoded(), "X-Amz-Checksum-Type": "FULL_OBJECT"}
+		}
+	}
+}
diff --git a/vendor/github.com/minio/minio-go/v7/core.go b/vendor/github.com/minio/minio-go/v7/core.go
index 132ea702..99b99db9 100644
--- a/vendor/github.com/minio/minio-go/v7/core.go
+++ b/vendor/github.com/minio/minio-go/v7/core.go
@@ -91,6 +91,7 @@ type PutObjectPartOptions struct {
 	Md5Base64, Sha256Hex  string
 	SSE                   encrypt.ServerSide
 	CustomHeader, Trailer http.Header
+	DisableContentSha256  bool
 }
 
 // PutObjectPart - Upload an object part.
@@ -107,7 +108,7 @@ func (c Core) PutObjectPart(ctx context.Context, bucket, object, uploadID string
 		sha256Hex:    opts.Sha256Hex,
 		size:         size,
 		sse:          opts.SSE,
-		streamSha256: true,
+		streamSha256: !opts.DisableContentSha256,
 		customHeader: opts.CustomHeader,
 		trailer:      opts.Trailer,
 	}
diff --git a/vendor/github.com/minio/minio-go/v7/functional_tests.go b/vendor/github.com/minio/minio-go/v7/functional_tests.go
index de17cdc6..33e87e6e 100644
--- a/vendor/github.com/minio/minio-go/v7/functional_tests.go
+++ b/vendor/github.com/minio/minio-go/v7/functional_tests.go
@@ -24,7 +24,7 @@ import (
 	"archive/zip"
 	"bytes"
 	"context"
-	"crypto/sha1"
+	"crypto/sha256"
 	"encoding/base64"
 	"errors"
 	"fmt"
@@ -49,9 +49,9 @@ import (
 
 	"github.com/dustin/go-humanize"
 	"github.com/google/uuid"
-	"github.com/minio/sha256-simd"
 
 	"github.com/minio/minio-go/v7"
+	"github.com/minio/minio-go/v7/pkg/cors"
 	"github.com/minio/minio-go/v7/pkg/credentials"
 	"github.com/minio/minio-go/v7/pkg/encrypt"
 	"github.com/minio/minio-go/v7/pkg/notification"
@@ -83,7 +83,7 @@ func createHTTPTransport() (transport *http.Transport) {
 		return nil
 	}
 
-	if mustParseBool(os.Getenv(skipCERTValidation)) {
+	if mustParseBool(os.Getenv(enableHTTPS)) && mustParseBool(os.Getenv(skipCERTValidation)) {
 		transport.TLSClientConfig.InsecureSkipVerify = true
 	}
 
@@ -160,12 +160,12 @@ func logError(testName, function string, args map[string]interface{}, startTime
 	} else {
 		logFailure(testName, function, args, startTime, alert, message, err)
 		if !isRunOnFail() {
-			panic(err)
+			panic(fmt.Sprintf("Test failed with message: %s, err: %v", message, err))
 		}
 	}
 }
 
-// log failed test runs
+// Log failed test runs, do not call this directly, use logError instead, as that correctly stops the test run
 func logFailure(testName, function string, args map[string]interface{}, startTime time.Time, alert, message string, err error) {
 	l := baseLogger(testName, function, args, startTime).With(
 		"status", "FAIL",
@@ -393,6 +393,42 @@ func getFuncNameLoc(caller int) string {
 	return strings.TrimPrefix(runtime.FuncForPC(pc).Name(), "main.")
 }
 
+type ClientConfig struct {
+	// MinIO client configuration
+	TraceOn         bool // Turn on tracing of HTTP requests and responses to stderr
+	CredsV2         bool // Use V2 credentials if true, otherwise use v4
+	TrailingHeaders bool // Send trailing headers in requests
+}
+
+func NewClient(config ClientConfig) (*minio.Client, error) {
+	// Instantiate new MinIO client
+	var creds *credentials.Credentials
+	if config.CredsV2 {
+		creds = credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), "")
+	} else {
+		creds = credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), "")
+	}
+	opts := &minio.Options{
+		Creds:           creds,
+		Transport:       createHTTPTransport(),
+		Secure:          mustParseBool(os.Getenv(enableHTTPS)),
+		TrailingHeaders: config.TrailingHeaders,
+	}
+	client, err := minio.New(os.Getenv(serverEndpoint), opts)
+	if err != nil {
+		return nil, err
+	}
+
+	if config.TraceOn {
+		client.TraceOn(os.Stderr)
+	}
+
+	// Set user agent.
+	client.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+
+	return client, nil
+}
+
 // Tests bucket re-create errors.
 func testMakeBucketError() {
 	region := "eu-central-1"
@@ -407,27 +443,12 @@ func testMakeBucketError() {
 		"region":     region,
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-			Transport: createHTTPTransport(),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -462,20 +483,12 @@ func testMetadataSizeLimit() {
 		"objectName":        "",
 		"opts.UserMetadata": "",
 	}
-	rand.Seed(startTime.Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-			Transport: createHTTPTransport(),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client creation failed", err)
 		return
 	}
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
 
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -531,27 +544,12 @@ func testMakeBucketRegions() {
 		"region":     region,
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -598,27 +596,12 @@ func testPutObjectReadAt() {
 		"opts":       "objectContentType",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -697,27 +680,12 @@ func testListObjectVersions() {
 		"recursive":  "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -817,27 +785,12 @@ func testStatObjectWithVersioning() {
 	function := "StatObject"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -935,27 +888,12 @@ func testGetObjectWithVersioning() {
 	function := "GetObject()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -1075,27 +1013,12 @@ func testPutObjectWithVersioning() {
 	function := "GetObject()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -1216,33 +1139,126 @@ func testPutObjectWithVersioning() {
 	logSuccess(testName, function, args, startTime)
 }
 
-func testCopyObjectWithVersioning() {
+func testListMultipartUpload() {
 	// initialize logging params
 	startTime := time.Now()
 	testName := getFuncName()
-	function := "CopyObject()"
+	function := "GetObject()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
+	core := minio.Core{Client: c}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	args["bucketName"] = bucketName
 
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	// Make a new bucket.
+	ctx := context.Background()
+	err = c.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{Region: "us-east-1", ObjectLocking: true})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "Make bucket failed", err)
+		return
+	}
+	defer func() {
+		if err = cleanupVersionedBucket(bucketName, c); err != nil {
+			logError(testName, function, args, startTime, "", "CleanupBucket failed", err)
+		}
+	}()
+	objName := "prefix/objectName"
+
+	want := minio.ListMultipartUploadsResult{
+		Bucket:             bucketName,
+		KeyMarker:          "",
+		UploadIDMarker:     "",
+		NextKeyMarker:      "",
+		NextUploadIDMarker: "",
+		EncodingType:       "url",
+		MaxUploads:         1000,
+		IsTruncated:        false,
+		Prefix:             "prefix/objectName",
+		Delimiter:          "/",
+		CommonPrefixes:     nil,
+	}
+	for i := 0; i < 5; i++ {
+		uid, err := core.NewMultipartUpload(ctx, bucketName, objName, minio.PutObjectOptions{})
+		if err != nil {
+			logError(testName, function, args, startTime, "", "NewMultipartUpload failed", err)
+			return
+		}
+		want.Uploads = append(want.Uploads, minio.ObjectMultipartInfo{
+			Initiated:    time.Time{},
+			StorageClass: "",
+			Key:          objName,
+			Size:         0,
+			UploadID:     uid,
+			Err:          nil,
+		})
+
+		for j := 0; j < 5; j++ {
+			cmpGot := func(call string, got minio.ListMultipartUploadsResult) bool {
+				for i := range got.Uploads {
+					got.Uploads[i].Initiated = time.Time{}
+				}
+				if !reflect.DeepEqual(want, got) {
+					err := fmt.Errorf("want: %#v\ngot : %#v", want, got)
+					logError(testName, function, args, startTime, "", call+" failed", err)
+				}
+				return true
+			}
+			got, err := core.ListMultipartUploads(ctx, bucketName, objName, "", "", "/", 1000)
+			if err != nil {
+				logError(testName, function, args, startTime, "", "ListMultipartUploads failed", err)
+				return
+			}
+			if !cmpGot("ListMultipartUploads-prefix", got) {
+				return
+			}
+			got, err = core.ListMultipartUploads(ctx, bucketName, objName, objName, "", "/", 1000)
+			got.KeyMarker = ""
+			if err != nil {
+				logError(testName, function, args, startTime, "", "ListMultipartUploads failed", err)
+				return
+			}
+			if !cmpGot("ListMultipartUploads-marker", got) {
+				return
+			}
+		}
+		if i > 2 {
+			err = core.AbortMultipartUpload(ctx, bucketName, objName, uid)
+			if err != nil {
+				logError(testName, function, args, startTime, "", "AbortMultipartUpload failed", err)
+				return
+			}
+			want.Uploads = want.Uploads[:len(want.Uploads)-1]
+		}
+	}
+	for _, up := range want.Uploads {
+		err = core.AbortMultipartUpload(ctx, bucketName, objName, up.UploadID)
+		if err != nil {
+			logError(testName, function, args, startTime, "", "AbortMultipartUpload failed", err)
+			return
+		}
+	}
+	logSuccess(testName, function, args, startTime)
+}
+
+func testCopyObjectWithVersioning() {
+	// initialize logging params
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "CopyObject()"
+	args := map[string]interface{}{}
+
+	c, err := NewClient(ClientConfig{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
+		return
+	}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
@@ -1361,27 +1377,12 @@ func testConcurrentCopyObjectWithVersioning() {
 	function := "CopyObject()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -1522,27 +1523,12 @@ func testComposeObjectWithVersioning() {
 	function := "ComposeObject()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -1663,27 +1649,12 @@ func testRemoveObjectWithVersioning() {
 	function := "DeleteObject()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -1776,27 +1747,12 @@ func testRemoveObjectsWithVersioning() {
 	function := "DeleteObjects()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -1872,27 +1828,12 @@ func testObjectTaggingWithVersioning() {
 	function := "{Get,Set,Remove}ObjectTagging()"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -2040,27 +1981,12 @@ func testPutObjectWithChecksums() {
 		return
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -2074,22 +2000,19 @@ func testPutObjectWithChecksums() {
 
 	defer cleanupBucket(bucketName, c)
 	tests := []struct {
-		header string
-		hasher hash.Hash
-
-		// Checksum values
-		ChecksumCRC32  string
-		ChecksumCRC32C string
-		ChecksumSHA1   string
-		ChecksumSHA256 string
+		cs minio.ChecksumType
 	}{
-		{header: "x-amz-checksum-crc32", hasher: crc32.NewIEEE()},
-		{header: "x-amz-checksum-crc32c", hasher: crc32.New(crc32.MakeTable(crc32.Castagnoli))},
-		{header: "x-amz-checksum-sha1", hasher: sha1.New()},
-		{header: "x-amz-checksum-sha256", hasher: sha256.New()},
+		{cs: minio.ChecksumCRC32C},
+		{cs: minio.ChecksumCRC32},
+		{cs: minio.ChecksumSHA1},
+		{cs: minio.ChecksumSHA256},
+		{cs: minio.ChecksumCRC64NVME},
 	}
 
-	for i, test := range tests {
+	for _, test := range tests {
+		if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() {
+			continue
+		}
 		bufSize := dataFileMap["datafile-10-kB"]
 
 		// Save the data
@@ -2110,29 +2033,27 @@ func testPutObjectWithChecksums() {
 			logError(testName, function, args, startTime, "", "Read failed", err)
 			return
 		}
-		h := test.hasher
+		h := test.cs.Hasher()
 		h.Reset()
-		// Wrong CRC.
-		meta[test.header] = base64.StdEncoding.EncodeToString(h.Sum(nil))
+
+		// Test with a bad CRC - we haven't called h.Write(b), so this is a checksum of empty data
+		meta[test.cs.Key()] = base64.StdEncoding.EncodeToString(h.Sum(nil))
 		args["metadata"] = meta
 		args["range"] = "false"
+		args["checksum"] = test.cs.String()
 
 		resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), minio.PutObjectOptions{
 			DisableMultipart: true,
 			UserMetadata:     meta,
 		})
 		if err == nil {
-			if i == 0 && resp.ChecksumCRC32 == "" {
-				logIgnored(testName, function, args, startTime, "Checksums does not appear to be supported by backend")
-				return
-			}
-			logError(testName, function, args, startTime, "", "PutObject failed", err)
+			logError(testName, function, args, startTime, "", "PutObject did not fail on wrong CRC", err)
 			return
 		}
 
 		// Set correct CRC.
 		h.Write(b)
-		meta[test.header] = base64.StdEncoding.EncodeToString(h.Sum(nil))
+		meta[test.cs.Key()] = base64.StdEncoding.EncodeToString(h.Sum(nil))
 		reader.Close()
 
 		resp, err = c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), minio.PutObjectOptions{
@@ -2148,6 +2069,7 @@ func testPutObjectWithChecksums() {
 		cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"])
 		cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"])
 		cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"])
+		cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"])
 
 		// Read the data back
 		gopts := minio.GetObjectOptions{Checksum: true}
@@ -2167,6 +2089,7 @@ func testPutObjectWithChecksums() {
 		cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"])
 		cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"])
 		cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"])
+		cmpChecksum(st.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"])
 
 		if st.Size != int64(bufSize) {
 			logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err)
@@ -2210,16 +2133,16 @@ func testPutObjectWithChecksums() {
 		cmpChecksum(st.ChecksumSHA1, "")
 		cmpChecksum(st.ChecksumCRC32, "")
 		cmpChecksum(st.ChecksumCRC32C, "")
+		cmpChecksum(st.ChecksumCRC64NVME, "")
 
 		delete(args, "range")
 		delete(args, "metadata")
+		logSuccess(testName, function, args, startTime)
 	}
-
-	logSuccess(testName, function, args, startTime)
 }
 
 // Test PutObject with custom checksums.
-func testPutMultipartObjectWithChecksums() {
+func testPutObjectWithTrailingChecksums() {
 	// initialize logging params
 	startTime := time.Now()
 	testName := getFuncName()
@@ -2227,7 +2150,7 @@ func testPutMultipartObjectWithChecksums() {
 	args := map[string]interface{}{
 		"bucketName": "",
 		"objectName": "",
-		"opts":       "minio.PutObjectOptions{UserMetadata: metadata, Progress: progress}",
+		"opts":       "minio.PutObjectOptions{UserMetadata: metadata, Progress: progress, TrailChecksum: xxx}",
 	}
 
 	if !isFullMode() {
@@ -2235,27 +2158,12 @@ func testPutMultipartObjectWithChecksums() {
 		return
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{TrailingHeaders: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -2267,48 +2175,22 @@ func testPutMultipartObjectWithChecksums() {
 		return
 	}
 
-	hashMultiPart := func(b []byte, partSize int, hasher hash.Hash) string {
-		r := bytes.NewReader(b)
-		tmp := make([]byte, partSize)
-		parts := 0
-		var all []byte
-		for {
-			n, err := io.ReadFull(r, tmp)
-			if err != nil && err != io.ErrUnexpectedEOF {
-				logError(testName, function, args, startTime, "", "Calc crc failed", err)
-			}
-			if n == 0 {
-				break
-			}
-			parts++
-			hasher.Reset()
-			hasher.Write(tmp[:n])
-			all = append(all, hasher.Sum(nil)...)
-			if err != nil {
-				break
-			}
-		}
-		hasher.Reset()
-		hasher.Write(all)
-		return fmt.Sprintf("%s-%d", base64.StdEncoding.EncodeToString(hasher.Sum(nil)), parts)
-	}
 	defer cleanupBucket(bucketName, c)
 	tests := []struct {
-		header string
-		hasher hash.Hash
-
-		// Checksum values
-		ChecksumCRC32  string
-		ChecksumCRC32C string
-		ChecksumSHA1   string
-		ChecksumSHA256 string
+		cs minio.ChecksumType
 	}{
-		// Currently there is no way to override the checksum type.
-		{header: "x-amz-checksum-crc32c", hasher: crc32.New(crc32.MakeTable(crc32.Castagnoli)), ChecksumCRC32C: "OpEx0Q==-13"},
+		{cs: minio.ChecksumCRC64NVME},
+		{cs: minio.ChecksumCRC32C},
+		{cs: minio.ChecksumCRC32},
+		{cs: minio.ChecksumSHA1},
+		{cs: minio.ChecksumSHA256},
 	}
-
 	for _, test := range tests {
-		bufSize := dataFileMap["datafile-129-MB"]
+		if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() {
+			continue
+		}
+		function := "PutObject(bucketName, objectName, reader,size, opts)"
+		bufSize := dataFileMap["datafile-10-kB"]
 
 		// Save the data
 		objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "")
@@ -2316,79 +2198,360 @@ func testPutMultipartObjectWithChecksums() {
 
 		cmpChecksum := func(got, want string) {
 			if want != got {
-				// logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got))
-				fmt.Printf("want %s, got %s\n", want, got)
+				logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got))
 				return
 			}
 		}
 
-		const partSize = 10 << 20
-		reader := getDataReader("datafile-129-MB")
+		meta := map[string]string{}
+		reader := getDataReader("datafile-10-kB")
 		b, err := io.ReadAll(reader)
 		if err != nil {
 			logError(testName, function, args, startTime, "", "Read failed", err)
 			return
 		}
-		reader.Close()
-		h := test.hasher
+		h := test.cs.Hasher()
 		h.Reset()
-		test.ChecksumCRC32C = hashMultiPart(b, partSize, test.hasher)
 
-		// Set correct CRC.
+		// Test with Wrong CRC.
+		args["metadata"] = meta
+		args["range"] = "false"
+		args["checksum"] = test.cs.String()
 
-		resp, err := c.PutObject(context.Background(), bucketName, objectName, io.NopCloser(bytes.NewReader(b)), int64(bufSize), minio.PutObjectOptions{
+		resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), minio.PutObjectOptions{
+			DisableMultipart:     true,
 			DisableContentSha256: true,
-			DisableMultipart:     false,
-			UserMetadata:         nil,
-			PartSize:             partSize,
+			UserMetadata:         meta,
+			Checksum:             test.cs,
 		})
 		if err != nil {
 			logError(testName, function, args, startTime, "", "PutObject failed", err)
 			return
 		}
-		cmpChecksum(resp.ChecksumSHA256, test.ChecksumSHA256)
-		cmpChecksum(resp.ChecksumSHA1, test.ChecksumSHA1)
-		cmpChecksum(resp.ChecksumCRC32, test.ChecksumCRC32)
-		cmpChecksum(resp.ChecksumCRC32C, test.ChecksumCRC32C)
+
+		h.Write(b)
+		meta[test.cs.Key()] = base64.StdEncoding.EncodeToString(h.Sum(nil))
+
+		cmpChecksum(resp.ChecksumSHA256, meta["x-amz-checksum-sha256"])
+		cmpChecksum(resp.ChecksumSHA1, meta["x-amz-checksum-sha1"])
+		cmpChecksum(resp.ChecksumCRC32, meta["x-amz-checksum-crc32"])
+		cmpChecksum(resp.ChecksumCRC32C, meta["x-amz-checksum-crc32c"])
+		cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"])
 
 		// Read the data back
 		gopts := minio.GetObjectOptions{Checksum: true}
-		gopts.PartNumber = 2
 
-		// We cannot use StatObject, since it ignores partnumber.
+		function = "GetObject(...)"
 		r, err := c.GetObject(context.Background(), bucketName, objectName, gopts)
 		if err != nil {
 			logError(testName, function, args, startTime, "", "GetObject failed", err)
 			return
 		}
-		io.Copy(io.Discard, r)
+
 		st, err := r.Stat()
 		if err != nil {
 			logError(testName, function, args, startTime, "", "Stat failed", err)
 			return
 		}
+		cmpChecksum(st.ChecksumSHA256, meta["x-amz-checksum-sha256"])
+		cmpChecksum(st.ChecksumSHA1, meta["x-amz-checksum-sha1"])
+		cmpChecksum(st.ChecksumCRC32, meta["x-amz-checksum-crc32"])
+		cmpChecksum(st.ChecksumCRC32C, meta["x-amz-checksum-crc32c"])
+		cmpChecksum(resp.ChecksumCRC64NVME, meta["x-amz-checksum-crc64nvme"])
 
-		// Test part 2 checksum...
-		h.Reset()
-		h.Write(b[partSize : 2*partSize])
-		got := base64.StdEncoding.EncodeToString(h.Sum(nil))
-		if test.ChecksumSHA256 != "" {
-			cmpChecksum(st.ChecksumSHA256, got)
+		if st.Size != int64(bufSize) {
+			logError(testName, function, args, startTime, "", "Number of bytes returned by PutObject does not match GetObject, expected "+string(bufSize)+" got "+string(st.Size), err)
+			return
 		}
-		if test.ChecksumSHA1 != "" {
-			cmpChecksum(st.ChecksumSHA1, got)
+
+		if err := r.Close(); err != nil {
+			logError(testName, function, args, startTime, "", "Object Close failed", err)
+			return
 		}
-		if test.ChecksumCRC32 != "" {
-			cmpChecksum(st.ChecksumCRC32, got)
+		if err := r.Close(); err == nil {
+			logError(testName, function, args, startTime, "", "Object already closed, should respond with error", err)
+			return
 		}
-		if test.ChecksumCRC32C != "" {
-			cmpChecksum(st.ChecksumCRC32C, got)
+
+		function = "GetObject( Range...)"
+		args["range"] = "true"
+		err = gopts.SetRange(100, 1000)
+		if err != nil {
+			logError(testName, function, args, startTime, "", "SetRange failed", err)
+			return
+		}
+		r, err = c.GetObject(context.Background(), bucketName, objectName, gopts)
+		if err != nil {
+			logError(testName, function, args, startTime, "", "GetObject failed", err)
+			return
+		}
+
+		b, err = io.ReadAll(r)
+		if err != nil {
+			logError(testName, function, args, startTime, "", "Read failed", err)
+			return
+		}
+		st, err = r.Stat()
+		if err != nil {
+			logError(testName, function, args, startTime, "", "Stat failed", err)
+			return
+		}
+
+		// Range requests should return empty checksums...
+		cmpChecksum(st.ChecksumSHA256, "")
+		cmpChecksum(st.ChecksumSHA1, "")
+		cmpChecksum(st.ChecksumCRC32, "")
+		cmpChecksum(st.ChecksumCRC32C, "")
+		cmpChecksum(st.ChecksumCRC64NVME, "")
+
+		function = "GetObjectAttributes(...)"
+		s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{})
+		if err != nil {
+			logError(testName, function, args, startTime, "", "GetObjectAttributes failed", err)
+			return
 		}
+		cmpChecksum(s.Checksum.ChecksumSHA256, meta["x-amz-checksum-sha256"])
+		cmpChecksum(s.Checksum.ChecksumSHA1, meta["x-amz-checksum-sha1"])
+		cmpChecksum(s.Checksum.ChecksumCRC32, meta["x-amz-checksum-crc32"])
+		cmpChecksum(s.Checksum.ChecksumCRC32C, meta["x-amz-checksum-crc32c"])
 
+		delete(args, "range")
 		delete(args, "metadata")
+		logSuccess(testName, function, args, startTime)
+	}
+}
+
+// Test PutObject with custom checksums.
+func testPutMultipartObjectWithChecksums(trailing bool) {
+	// initialize logging params
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "PutObject(bucketName, objectName, reader,size, opts)"
+	args := map[string]interface{}{
+		"bucketName": "",
+		"objectName": "",
+		"opts":       fmt.Sprintf("minio.PutObjectOptions{UserMetadata: metadata, Trailing: %v}", trailing),
 	}
 
-	logSuccess(testName, function, args, startTime)
+	if !isFullMode() {
+		logIgnored(testName, function, args, startTime, "Skipping functional tests for short/quick runs")
+		return
+	}
+
+	c, err := NewClient(ClientConfig{TrailingHeaders: trailing})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
+		return
+	}
+
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	args["bucketName"] = bucketName
+
+	// Make a new bucket.
+	err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1"})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "Make bucket failed", err)
+		return
+	}
+
+	hashMultiPart := func(b []byte, partSize int, cs minio.ChecksumType) string {
+		r := bytes.NewReader(b)
+		hasher := cs.Hasher()
+		if cs.FullObjectRequested() {
+			partSize = len(b)
+		}
+		tmp := make([]byte, partSize)
+		parts := 0
+		var all []byte
+		for {
+			n, err := io.ReadFull(r, tmp)
+			if err != nil && err != io.ErrUnexpectedEOF && err != io.EOF {
+				logError(testName, function, args, startTime, "", "Calc crc failed", err)
+			}
+			if n == 0 {
+				break
+			}
+			parts++
+			hasher.Reset()
+			hasher.Write(tmp[:n])
+			all = append(all, hasher.Sum(nil)...)
+			if err != nil {
+				break
+			}
+		}
+		if parts == 1 {
+			return base64.StdEncoding.EncodeToString(hasher.Sum(nil))
+		}
+		hasher.Reset()
+		hasher.Write(all)
+		return fmt.Sprintf("%s-%d", base64.StdEncoding.EncodeToString(hasher.Sum(nil)), parts)
+	}
+	defer cleanupBucket(bucketName, c)
+	tests := []struct {
+		cs minio.ChecksumType
+	}{
+		{cs: minio.ChecksumFullObjectCRC32},
+		{cs: minio.ChecksumFullObjectCRC32C},
+		{cs: minio.ChecksumCRC64NVME},
+		{cs: minio.ChecksumCRC32C},
+		{cs: minio.ChecksumCRC32},
+		{cs: minio.ChecksumSHA1},
+		{cs: minio.ChecksumSHA256},
+	}
+
+	for _, test := range tests {
+		if os.Getenv("MINT_NO_FULL_OBJECT") != "" && test.cs.FullObjectRequested() {
+			continue
+		}
+
+		args["section"] = "prep"
+		bufSize := dataFileMap["datafile-129-MB"]
+		// Save the data
+		objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+		args["objectName"] = objectName
+		args["checksum"] = test.cs.String()
+
+		cmpChecksum := func(got, want string) {
+			if want != got {
+				logError(testName, function, args, startTime, "", "checksum mismatch", fmt.Errorf("want %s, got %s", want, got))
+				// fmt.Printf("want %s, got %s\n", want, got)
+				return
+			}
+		}
+
+		const partSize = 10 << 20
+		reader := getDataReader("datafile-129-MB")
+		b, err := io.ReadAll(reader)
+		if err != nil {
+			logError(testName, function, args, startTime, "", "Read failed", err)
+			return
+		}
+		reader.Close()
+		h := test.cs.Hasher()
+		h.Reset()
+		want := hashMultiPart(b, partSize, test.cs)
+
+		var cs minio.ChecksumType
+		rd := io.Reader(io.NopCloser(bytes.NewReader(b)))
+		if trailing {
+			cs = test.cs
+			rd = bytes.NewReader(b)
+		}
+
+		// Set correct CRC.
+		args["section"] = "PutObject"
+		resp, err := c.PutObject(context.Background(), bucketName, objectName, rd, int64(bufSize), minio.PutObjectOptions{
+			DisableContentSha256: true,
+			DisableMultipart:     false,
+			UserMetadata:         nil,
+			PartSize:             partSize,
+			AutoChecksum:         test.cs,
+			Checksum:             cs,
+		})
+		if err != nil {
+			logError(testName, function, args, startTime, "", "PutObject failed", err)
+			return
+		}
+
+		switch test.cs.Base() {
+		case minio.ChecksumCRC32C:
+			cmpChecksum(resp.ChecksumCRC32C, want)
+		case minio.ChecksumCRC32:
+			cmpChecksum(resp.ChecksumCRC32, want)
+		case minio.ChecksumSHA1:
+			cmpChecksum(resp.ChecksumSHA1, want)
+		case minio.ChecksumSHA256:
+			cmpChecksum(resp.ChecksumSHA256, want)
+		case minio.ChecksumCRC64NVME:
+			cmpChecksum(resp.ChecksumCRC64NVME, want)
+		}
+
+		args["section"] = "HeadObject"
+		st, err := c.StatObject(context.Background(), bucketName, objectName, minio.StatObjectOptions{Checksum: true})
+		if err != nil {
+			logError(testName, function, args, startTime, "", "StatObject failed", err)
+			return
+		}
+		switch test.cs.Base() {
+		case minio.ChecksumCRC32C:
+			cmpChecksum(st.ChecksumCRC32C, want)
+		case minio.ChecksumCRC32:
+			cmpChecksum(st.ChecksumCRC32, want)
+		case minio.ChecksumSHA1:
+			cmpChecksum(st.ChecksumSHA1, want)
+		case minio.ChecksumSHA256:
+			cmpChecksum(st.ChecksumSHA256, want)
+		case minio.ChecksumCRC64NVME:
+			cmpChecksum(st.ChecksumCRC64NVME, want)
+		}
+
+		args["section"] = "GetObjectAttributes"
+		s, err := c.GetObjectAttributes(context.Background(), bucketName, objectName, minio.ObjectAttributesOptions{})
+		if err != nil {
+			logError(testName, function, args, startTime, "", "GetObjectAttributes failed", err)
+			return
+		}
+
+		if strings.ContainsRune(want, '-') {
+			want = want[:strings.IndexByte(want, '-')]
+		}
+		switch test.cs {
+		// Full Object CRC does not return anything with GetObjectAttributes
+		case minio.ChecksumCRC32C:
+			cmpChecksum(s.Checksum.ChecksumCRC32C, want)
+		case minio.ChecksumCRC32:
+			cmpChecksum(s.Checksum.ChecksumCRC32, want)
+		case minio.ChecksumSHA1:
+			cmpChecksum(s.Checksum.ChecksumSHA1, want)
+		case minio.ChecksumSHA256:
+			cmpChecksum(s.Checksum.ChecksumSHA256, want)
+		}
+
+		// Read the data back
+		gopts := minio.GetObjectOptions{Checksum: true}
+		gopts.PartNumber = 2
+
+		// We cannot use StatObject, since it ignores partnumber.
+		args["section"] = "GetObject-Part"
+		r, err := c.GetObject(context.Background(), bucketName, objectName, gopts)
+		if err != nil {
+			logError(testName, function, args, startTime, "", "GetObject failed", err)
+			return
+		}
+		io.Copy(io.Discard, r)
+		st, err = r.Stat()
+		if err != nil {
+			logError(testName, function, args, startTime, "", "Stat failed", err)
+			return
+		}
+
+		// Test part 2 checksum...
+		h.Reset()
+		h.Write(b[partSize : 2*partSize])
+		want = base64.StdEncoding.EncodeToString(h.Sum(nil))
+
+		switch test.cs {
+		// Full Object CRC does not return any part CRC for whatever reason.
+		case minio.ChecksumCRC32C:
+			cmpChecksum(st.ChecksumCRC32C, want)
+		case minio.ChecksumCRC32:
+			cmpChecksum(st.ChecksumCRC32, want)
+		case minio.ChecksumSHA1:
+			cmpChecksum(st.ChecksumSHA1, want)
+		case minio.ChecksumSHA256:
+			cmpChecksum(st.ChecksumSHA256, want)
+		case minio.ChecksumCRC64NVME:
+			// AWS doesn't return part checksum, but may in the future.
+			if st.ChecksumCRC64NVME != "" {
+				cmpChecksum(st.ChecksumCRC64NVME, want)
+			}
+		}
+
+		delete(args, "metadata")
+		delete(args, "section")
+		logSuccess(testName, function, args, startTime)
+	}
 }
 
 // Test PutObject with trailing checksums.
@@ -2408,25 +2571,12 @@ func testTrailingChecksums() {
 		return
 	}
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:           credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport:       createHTTPTransport(),
-			Secure:          mustParseBool(os.Getenv(enableHTTPS)),
-			TrailingHeaders: true,
-		})
+	c, err := NewClient(ClientConfig{TrailingHeaders: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -2548,7 +2698,6 @@ func testTrailingChecksums() {
 		test.ChecksumCRC32C = hashMultiPart(b, int(test.PO.PartSize), test.hasher)
 
 		// Set correct CRC.
-		// c.TraceOn(os.Stderr)
 		resp, err := c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(b), int64(bufSize), test.PO)
 		if err != nil {
 			logError(testName, function, args, startTime, "", "PutObject failed", err)
@@ -2599,6 +2748,7 @@ func testTrailingChecksums() {
 		}
 
 		delete(args, "metadata")
+		logSuccess(testName, function, args, startTime)
 	}
 }
 
@@ -2619,25 +2769,12 @@ func testPutObjectWithAutomaticChecksums() {
 		return
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:           credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport:       createHTTPTransport(),
-			Secure:          mustParseBool(os.Getenv(enableHTTPS)),
-			TrailingHeaders: true,
-		})
+	c, err := NewClient(ClientConfig{TrailingHeaders: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -2664,8 +2801,6 @@ func testPutObjectWithAutomaticChecksums() {
 		{header: "x-amz-checksum-crc32c", hasher: crc32.New(crc32.MakeTable(crc32.Castagnoli))},
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
 	// defer c.TraceOff()
 
 	for i, test := range tests {
@@ -2775,20 +2910,12 @@ func testGetObjectAttributes() {
 		return
 	}
 
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			TrailingHeaders: true,
-			Creds:           credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport:       createHTTPTransport(),
-			Secure:          mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{TrailingHeaders: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
 	err = c.MakeBucket(
@@ -2847,8 +2974,8 @@ func testGetObjectAttributes() {
 		testFiles[i].UploadInfo, err = c.PutObject(context.Background(), v.Bucket, v.Object, reader, int64(bufSize), minio.PutObjectOptions{
 			ContentType:    v.ContentType,
 			SendContentMd5: v.SendContentMd5,
+			Checksum:       minio.ChecksumCRC32C,
 		})
-
 		if err != nil {
 			logError(testName, function, args, startTime, "", "PutObject failed", err)
 			return
@@ -2929,7 +3056,7 @@ func testGetObjectAttributes() {
 		test: objectAttributesTestOptions{
 			TestFileName:    "file1",
 			StorageClass:    "STANDARD",
-			HasFullChecksum: false,
+			HasFullChecksum: true,
 		},
 	}
 
@@ -2982,19 +3109,12 @@ func testGetObjectAttributesSSECEncryption() {
 		return
 	}
 
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			TrailingHeaders: true,
-			Creds:           credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Secure:          mustParseBool(os.Getenv(enableHTTPS)),
-			Transport:       createHTTPTransport(),
-		})
+	c, err := NewClient(ClientConfig{TrailingHeaders: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
 	err = c.MakeBucket(
@@ -3018,9 +3138,10 @@ func testGetObjectAttributesSSECEncryption() {
 
 	info, err := c.PutObject(context.Background(), bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{
 		ContentType:          "content/custom",
-		SendContentMd5:       true,
+		SendContentMd5:       false,
 		ServerSideEncryption: sse,
 		PartSize:             uint64(bufSize) / 2,
+		Checksum:             minio.ChecksumCRC32C,
 	})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "PutObject failed", err)
@@ -3040,9 +3161,9 @@ func testGetObjectAttributesSSECEncryption() {
 		ETag:             info.ETag,
 		NumberOfParts:    2,
 		ObjectSize:       int(info.Size),
-		HasFullChecksum:  false,
+		HasFullChecksum:  true,
 		HasParts:         true,
-		HasPartChecksums: false,
+		HasPartChecksums: true,
 	})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "Validating GetObjectsAttributes response failed", err)
@@ -3067,19 +3188,12 @@ func testGetObjectAttributesErrorCases() {
 		return
 	}
 
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			TrailingHeaders: true,
-			Creds:           credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport:       createHTTPTransport(),
-			Secure:          mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{TrailingHeaders: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
 	unknownBucket := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-bucket-")
 	unknownObject := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-object-")
 
@@ -3231,16 +3345,10 @@ func validateObjectAttributeRequest(OA *minio.ObjectAttributes, opts *minio.Obje
 		}
 	}
 
-	hasFullObjectChecksum := true
-	if OA.Checksum.ChecksumCRC32 == "" {
-		if OA.Checksum.ChecksumCRC32C == "" {
-			if OA.Checksum.ChecksumSHA1 == "" {
-				if OA.Checksum.ChecksumSHA256 == "" {
-					hasFullObjectChecksum = false
-				}
-			}
-		}
-	}
+	hasFullObjectChecksum := (OA.Checksum.ChecksumCRC32 != "" ||
+		OA.Checksum.ChecksumCRC32C != "" ||
+		OA.Checksum.ChecksumSHA1 != "" ||
+		OA.Checksum.ChecksumSHA256 != "")
 
 	if test.HasFullChecksum {
 		if !hasFullObjectChecksum {
@@ -3329,27 +3437,12 @@ func testPutObjectWithMetadata() {
 		return
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -3436,27 +3529,12 @@ func testPutObjectWithContentLanguage() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -3506,27 +3584,12 @@ func testPutObjectStreaming() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -3578,27 +3641,12 @@ func testGetObjectSeekEnd() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -3701,27 +3749,12 @@ func testGetObjectClosedTwice() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -3792,26 +3825,13 @@ func testRemoveObjectsContext() {
 		"bucketName": "",
 	}
 
-	// Seed random based on current tie.
-	rand.Seed(time.Now().Unix())
-
 	// Instantiate new minio client.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-	// Enable tracing, write to stdout.
-	// c.TraceOn(os.Stderr)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -3889,27 +3909,12 @@ func testRemoveMultipleObjects() {
 		"bucketName": "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
-	// Enable tracing, write to stdout.
-	// c.TraceOn(os.Stderr)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -3973,27 +3978,12 @@ func testRemoveMultipleObjectsWithResult() {
 		"bucketName": "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
-	// Enable tracing, write to stdout.
-	// c.TraceOn(os.Stderr)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -4109,27 +4099,12 @@ func testFPutObjectMultipart() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -4215,27 +4190,12 @@ func testFPutObject() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	location := "us-east-1"
@@ -4385,27 +4345,13 @@ func testFPutObjectContext() {
 		"fileName":   "",
 		"opts":       "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -4486,27 +4432,13 @@ func testFPutObjectContextV2() {
 		"objectName": "",
 		"opts":       "minio.PutObjectOptions{ContentType:objectContentType}",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -4591,24 +4523,12 @@ func testPutObjectContext() {
 		"opts":       "",
 	}
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Make a new bucket.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -4661,27 +4581,12 @@ func testGetObjectS3Zip() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{"x-minio-extract": true}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -4845,27 +4750,12 @@ func testGetObjectReadSeekFunctional() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -5015,27 +4905,12 @@ func testGetObjectReadAtFunctional() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -5193,27 +5068,12 @@ func testGetObjectReadAtWhenEOFWasReached() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -5313,27 +5173,12 @@ func testPresignedPostPolicy() {
 		"policy": "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 
@@ -5361,50 +5206,22 @@ func testPresignedPostPolicy() {
 		return
 	}
 
-	// Save the data
-	_, err = c.PutObject(context.Background(), bucketName, objectName, bytes.NewReader(buf), int64(len(buf)), minio.PutObjectOptions{ContentType: "binary/octet-stream"})
-	if err != nil {
-		logError(testName, function, args, startTime, "", "PutObject failed", err)
-		return
-	}
-
 	policy := minio.NewPostPolicy()
-
-	if err := policy.SetBucket(""); err == nil {
-		logError(testName, function, args, startTime, "", "SetBucket did not fail for invalid conditions", err)
-		return
-	}
-	if err := policy.SetKey(""); err == nil {
-		logError(testName, function, args, startTime, "", "SetKey did not fail for invalid conditions", err)
-		return
-	}
-	if err := policy.SetExpires(time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC)); err == nil {
-		logError(testName, function, args, startTime, "", "SetExpires did not fail for invalid conditions", err)
-		return
-	}
-	if err := policy.SetContentType(""); err == nil {
-		logError(testName, function, args, startTime, "", "SetContentType did not fail for invalid conditions", err)
-		return
-	}
-	if err := policy.SetContentLengthRange(1024*1024, 1024); err == nil {
-		logError(testName, function, args, startTime, "", "SetContentLengthRange did not fail for invalid conditions", err)
-		return
-	}
-	if err := policy.SetUserMetadata("", ""); err == nil {
-		logError(testName, function, args, startTime, "", "SetUserMetadata did not fail for invalid conditions", err)
-		return
-	}
-
 	policy.SetBucket(bucketName)
 	policy.SetKey(objectName)
 	policy.SetExpires(time.Now().UTC().AddDate(0, 0, 10)) // expires in 10 days
 	policy.SetContentType("binary/octet-stream")
 	policy.SetContentLengthRange(10, 1024*1024)
 	policy.SetUserMetadata(metadataKey, metadataValue)
+	policy.SetContentEncoding("gzip")
 
 	// Add CRC32C
 	checksum := minio.ChecksumCRC32C.ChecksumBytes(buf)
-	policy.SetChecksum(checksum)
+	err = policy.SetChecksum(checksum)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "SetChecksum failed", err)
+		return
+	}
 
 	args["policy"] = policy.String()
 
@@ -5460,18 +5277,12 @@ func testPresignedPostPolicy() {
 	}
 	writer.Close()
 
-	transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS)))
-	if err != nil {
-		logError(testName, function, args, startTime, "", "DefaultTransport failed", err)
-		return
-	}
-
 	httpClient := &http.Client{
 		// Setting a sensible time out of 30secs to wait for response
 		// headers. Request is pro-actively canceled after 30secs
 		// with no response.
 		Timeout:   30 * time.Second,
-		Transport: transport,
+		Transport: createHTTPTransport(),
 	}
 	args["url"] = presignedPostPolicyURL.String()
 
@@ -5506,7 +5317,7 @@ func testPresignedPostPolicy() {
 	expectedLocation := scheme + os.Getenv(serverEndpoint) + "/" + bucketName + "/" + objectName
 	expectedLocationBucketDNS := scheme + bucketName + "." + os.Getenv(serverEndpoint) + "/" + objectName
 
-	if !strings.Contains(expectedLocation, "s3.amazonaws.com/") {
+	if !strings.Contains(expectedLocation, ".amazonaws.com/") {
 		// Test when not against AWS S3.
 		if val, ok := res.Header["Location"]; ok {
 			if val[0] != expectedLocation && val[0] != expectedLocationBucketDNS {
@@ -5518,9 +5329,184 @@ func testPresignedPostPolicy() {
 			return
 		}
 	}
-	want := checksum.Encoded()
-	if got := res.Header.Get("X-Amz-Checksum-Crc32c"); got != want {
-		logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %q, got %q", want, got), nil)
+	wantChecksumCrc32c := checksum.Encoded()
+	if got := res.Header.Get("X-Amz-Checksum-Crc32c"); got != wantChecksumCrc32c {
+		logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %q, got %q", wantChecksumCrc32c, got), nil)
+		return
+	}
+
+	// Ensure that when we subsequently GetObject, the checksum is returned
+	gopts := minio.GetObjectOptions{Checksum: true}
+	r, err := c.GetObject(context.Background(), bucketName, objectName, gopts)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "GetObject failed", err)
+		return
+	}
+	st, err := r.Stat()
+	if err != nil {
+		logError(testName, function, args, startTime, "", "Stat failed", err)
+		return
+	}
+	if st.ChecksumCRC32C != wantChecksumCrc32c {
+		logError(testName, function, args, startTime, "", fmt.Sprintf("Want checksum %s, got %s", wantChecksumCrc32c, st.ChecksumCRC32C), nil)
+		return
+	}
+
+	logSuccess(testName, function, args, startTime)
+}
+
+// testPresignedPostPolicyWrongFile tests that when we have a policy with a checksum, we cannot POST the wrong file
+func testPresignedPostPolicyWrongFile() {
+	// initialize logging params
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "PresignedPostPolicy(policy)"
+	args := map[string]interface{}{
+		"policy": "",
+	}
+
+	c, err := NewClient(ClientConfig{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
+		return
+	}
+
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+
+	// Make a new bucket in 'us-east-1' (source bucket).
+	err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1"})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+		return
+	}
+
+	defer cleanupBucket(bucketName, c)
+
+	objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+	// Azure requires the key to not start with a number
+	metadataKey := randString(60, rand.NewSource(time.Now().UnixNano()), "user")
+	metadataValue := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+
+	policy := minio.NewPostPolicy()
+	policy.SetBucket(bucketName)
+	policy.SetKey(objectName)
+	policy.SetExpires(time.Now().UTC().AddDate(0, 0, 10)) // expires in 10 days
+	policy.SetContentType("binary/octet-stream")
+	policy.SetContentLengthRange(10, 1024*1024)
+	policy.SetUserMetadata(metadataKey, metadataValue)
+
+	// Add CRC32C of some data that the policy will explicitly allow.
+	checksum := minio.ChecksumCRC32C.ChecksumBytes([]byte{0x01, 0x02, 0x03})
+	err = policy.SetChecksum(checksum)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "SetChecksum failed", err)
+		return
+	}
+
+	args["policy"] = policy.String()
+
+	presignedPostPolicyURL, formData, err := c.PresignedPostPolicy(context.Background(), policy)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "PresignedPostPolicy failed", err)
+		return
+	}
+
+	// At this stage, we have a policy that allows us to upload for a specific checksum.
+	// Test that uploading datafile-10-kB, with a different checksum, fails as expected
+	filePath := getMintDataDirFilePath("datafile-10-kB")
+	if filePath == "" {
+		// Make a temp file with 10 KB data.
+		file, err := os.CreateTemp(os.TempDir(), "PresignedPostPolicyTest")
+		if err != nil {
+			logError(testName, function, args, startTime, "", "TempFile creation failed", err)
+			return
+		}
+		if _, err = io.Copy(file, getDataReader("datafile-10-kB")); err != nil {
+			logError(testName, function, args, startTime, "", "Copy failed", err)
+			return
+		}
+		if err = file.Close(); err != nil {
+			logError(testName, function, args, startTime, "", "File Close failed", err)
+			return
+		}
+		filePath = file.Name()
+	}
+	fileReader := getDataReader("datafile-10-kB")
+	defer fileReader.Close()
+	buf10k, err := io.ReadAll(fileReader)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "ReadAll failed", err)
+		return
+	}
+	otherChecksum := minio.ChecksumCRC32C.ChecksumBytes(buf10k)
+
+	var formBuf bytes.Buffer
+	writer := multipart.NewWriter(&formBuf)
+	for k, v := range formData {
+		if k == "x-amz-checksum-crc32c" {
+			v = otherChecksum.Encoded()
+		}
+		writer.WriteField(k, v)
+	}
+
+	// Add file to post request
+	f, err := os.Open(filePath)
+	defer f.Close()
+	if err != nil {
+		logError(testName, function, args, startTime, "", "File open failed", err)
+		return
+	}
+	w, err := writer.CreateFormFile("file", filePath)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "CreateFormFile failed", err)
+		return
+	}
+	_, err = io.Copy(w, f)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "Copy failed", err)
+		return
+	}
+	writer.Close()
+
+	httpClient := &http.Client{
+		Timeout:   30 * time.Second,
+		Transport: createHTTPTransport(),
+	}
+	args["url"] = presignedPostPolicyURL.String()
+
+	req, err := http.NewRequest(http.MethodPost, presignedPostPolicyURL.String(), bytes.NewReader(formBuf.Bytes()))
+	if err != nil {
+		logError(testName, function, args, startTime, "", "HTTP request failed", err)
+		return
+	}
+
+	req.Header.Set("Content-Type", writer.FormDataContentType())
+
+	// Make the POST request with the form data.
+	res, err := httpClient.Do(req)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "HTTP request failed", err)
+		return
+	}
+	defer res.Body.Close()
+	if res.StatusCode != http.StatusForbidden {
+		logError(testName, function, args, startTime, "", "HTTP request unexpected status", errors.New(res.Status))
+		return
+	}
+
+	// Read the response body, ensure it has checksum failure message
+	resBody, err := io.ReadAll(res.Body)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "ReadAll failed", err)
+		return
+	}
+
+	// Normalize the response body, because S3 uses quotes around the policy condition components
+	// in the error message, MinIO does not.
+	resBodyStr := strings.ReplaceAll(string(resBody), `"`, "")
+	if !strings.Contains(resBodyStr, "Policy Condition failed: [eq, $x-amz-checksum-crc32c, 8TDyHg=") {
+		logError(testName, function, args, startTime, "", "Unexpected response body", errors.New(resBodyStr))
 		return
 	}
 
@@ -5535,27 +5521,12 @@ func testCopyObject() {
 	function := "CopyObject(dst, src)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 
@@ -5730,27 +5701,12 @@ func testSSECEncryptedGetObjectReadSeekFunctional() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -5913,27 +5869,12 @@ func testSSES3EncryptedGetObjectReadSeekFunctional() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -6094,27 +6035,12 @@ func testSSECEncryptedGetObjectReadAtFunctional() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -6278,27 +6204,12 @@ func testSSES3EncryptedGetObjectReadAtFunctional() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -6463,27 +6374,13 @@ func testSSECEncryptionPutGet() {
 		"objectName": "",
 		"sse":        "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -6573,27 +6470,13 @@ func testSSECEncryptionFPut() {
 		"contentType": "",
 		"sse":         "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -6696,27 +6579,13 @@ func testSSES3EncryptionPutGet() {
 		"objectName": "",
 		"sse":        "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -6804,27 +6673,13 @@ func testSSES3EncryptionFPut() {
 		"contentType": "",
 		"sse":         "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -6933,26 +6788,12 @@ func testBucketNotification() {
 		return
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable to debug
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	bucketName := os.Getenv("NOTIFY_BUCKET")
 	args["bucketName"] = bucketName
 
@@ -7028,26 +6869,12 @@ func testFunctional() {
 	functionAll := ""
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, nil, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable to debug
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 
@@ -7088,7 +6915,6 @@ func testFunctional() {
 		"bucketName": bucketName,
 	}
 	exists, err = c.BucketExists(context.Background(), bucketName)
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "BucketExists failed", err)
 		return
@@ -7151,7 +6977,6 @@ func testFunctional() {
 		"bucketPolicy": writeOnlyPolicy,
 	}
 	err = c.SetBucketPolicy(context.Background(), bucketName, writeOnlyPolicy)
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "SetBucketPolicy failed", err)
 		return
@@ -7180,7 +7005,6 @@ func testFunctional() {
 		"bucketPolicy": readWritePolicy,
 	}
 	err = c.SetBucketPolicy(context.Background(), bucketName, readWritePolicy)
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "SetBucketPolicy failed", err)
 		return
@@ -7357,7 +7181,6 @@ func testFunctional() {
 		"fileName":   fileName + "-f",
 	}
 	err = c.FGetObject(context.Background(), bucketName, objectName, fileName+"-f", minio.GetObjectOptions{})
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "FGetObject failed", err)
 		return
@@ -7389,7 +7212,7 @@ func testFunctional() {
 		return
 	}
 
-	transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS)))
+	transport := createHTTPTransport()
 	if err != nil {
 		logError(testName, function, args, startTime, "", "DefaultTransport failed", err)
 		return
@@ -7489,7 +7312,6 @@ func testFunctional() {
 		"reqParams":  reqParams,
 	}
 	presignedGetURL, err = c.PresignedGetObject(context.Background(), bucketName, objectName, 3600*time.Second, reqParams)
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "PresignedGetObject failed", err)
 		return
@@ -7646,14 +7468,12 @@ func testFunctional() {
 		"objectName": objectName,
 	}
 	err = c.RemoveObject(context.Background(), bucketName, objectName, minio.RemoveObjectOptions{})
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "RemoveObject failed", err)
 		return
 	}
 	args["objectName"] = objectName + "-f"
 	err = c.RemoveObject(context.Background(), bucketName, objectName+"-f", minio.RemoveObjectOptions{})
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "RemoveObject failed", err)
 		return
@@ -7661,7 +7481,6 @@ func testFunctional() {
 
 	args["objectName"] = objectName + "-nolength"
 	err = c.RemoveObject(context.Background(), bucketName, objectName+"-nolength", minio.RemoveObjectOptions{})
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "RemoveObject failed", err)
 		return
@@ -7669,7 +7488,6 @@ func testFunctional() {
 
 	args["objectName"] = objectName + "-presigned"
 	err = c.RemoveObject(context.Background(), bucketName, objectName+"-presigned", minio.RemoveObjectOptions{})
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "RemoveObject failed", err)
 		return
@@ -7677,7 +7495,6 @@ func testFunctional() {
 
 	args["objectName"] = objectName + "-presign-custom"
 	err = c.RemoveObject(context.Background(), bucketName, objectName+"-presign-custom", minio.RemoveObjectOptions{})
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "RemoveObject failed", err)
 		return
@@ -7689,7 +7506,6 @@ func testFunctional() {
 		"bucketName": bucketName,
 	}
 	err = c.RemoveBucket(context.Background(), bucketName)
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "RemoveBucket failed", err)
 		return
@@ -7718,24 +7534,12 @@ func testGetObjectModified() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Make a new bucket.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -7814,24 +7618,12 @@ func testPutObjectUploadSeekedObject() {
 		"contentType":  "binary/octet-stream",
 	}
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Make a new bucket.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -7934,27 +7726,12 @@ func testMakeBucketErrorV2() {
 		"region":     "eu-west-1",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	region := "eu-west-1"
@@ -7994,27 +7771,12 @@ func testGetObjectClosedTwiceV2() {
 		"region":     "eu-west-1",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -8085,27 +7847,12 @@ func testFPutObjectV2() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -8246,27 +7993,12 @@ func testMakeBucketRegionsV2() {
 		"region":     "eu-west-1",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -8309,27 +8041,12 @@ func testGetObjectReadSeekFunctionalV2() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -8464,27 +8181,12 @@ func testGetObjectReadAtFunctionalV2() {
 	function := "GetObject(bucketName, objectName)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -8626,27 +8328,12 @@ func testCopyObjectV2() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 
@@ -8845,13 +8532,7 @@ func testComposeObjectErrorCasesV2() {
 	function := "ComposeObject(destination, sourceList)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -8943,13 +8624,7 @@ func testCompose10KSourcesV2() {
 	function := "ComposeObject(destination, sourceList)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -8965,13 +8640,7 @@ func testEncryptedEmptyObject() {
 	function := "PutObject(bucketName, objectName, reader, objectSize, opts)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
@@ -9119,7 +8788,7 @@ func testEncryptedCopyObjectWrapper(c *minio.Client, bucketName string, sseSrc,
 		dstEncryption = sseDst
 	}
 	// 3. get copied object and check if content is equal
-	coreClient := minio.Core{c}
+	coreClient := minio.Core{Client: c}
 	reader, _, _, err := coreClient.GetObject(context.Background(), bucketName, "dstObject", minio.GetObjectOptions{ServerSideEncryption: dstEncryption})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "GetObject failed", err)
@@ -9226,13 +8895,7 @@ func testUnencryptedToSSECCopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9241,7 +8904,6 @@ func testUnencryptedToSSECCopyObject() {
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 
 	sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject"))
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, nil, sseDst)
 }
 
@@ -9253,13 +8915,7 @@ func testUnencryptedToSSES3CopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9269,7 +8925,6 @@ func testUnencryptedToSSES3CopyObject() {
 
 	var sseSrc encrypt.ServerSide
 	sseDst := encrypt.NewSSE()
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9281,13 +8936,7 @@ func testUnencryptedToUnencryptedCopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9296,7 +8945,6 @@ func testUnencryptedToUnencryptedCopyObject() {
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 
 	var sseSrc, sseDst encrypt.ServerSide
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9308,13 +8956,7 @@ func testEncryptedSSECToSSECCopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9324,7 +8966,6 @@ func testEncryptedSSECToSSECCopyObject() {
 
 	sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject"))
 	sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject"))
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9336,13 +8977,7 @@ func testEncryptedSSECToSSES3CopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9352,7 +8987,6 @@ func testEncryptedSSECToSSES3CopyObject() {
 
 	sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject"))
 	sseDst := encrypt.NewSSE()
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9364,13 +8998,7 @@ func testEncryptedSSECToUnencryptedCopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9380,7 +9008,6 @@ func testEncryptedSSECToUnencryptedCopyObject() {
 
 	sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject"))
 	var sseDst encrypt.ServerSide
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9392,13 +9019,7 @@ func testEncryptedSSES3ToSSECCopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9408,7 +9029,6 @@ func testEncryptedSSES3ToSSECCopyObject() {
 
 	sseSrc := encrypt.NewSSE()
 	sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject"))
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9420,13 +9040,7 @@ func testEncryptedSSES3ToSSES3CopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9436,7 +9050,6 @@ func testEncryptedSSES3ToSSES3CopyObject() {
 
 	sseSrc := encrypt.NewSSE()
 	sseDst := encrypt.NewSSE()
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9448,13 +9061,7 @@ func testEncryptedSSES3ToUnencryptedCopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9464,7 +9071,6 @@ func testEncryptedSSES3ToUnencryptedCopyObject() {
 
 	sseSrc := encrypt.NewSSE()
 	var sseDst encrypt.ServerSide
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9476,13 +9082,7 @@ func testEncryptedCopyObjectV2() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9492,7 +9092,6 @@ func testEncryptedCopyObjectV2() {
 
 	sseSrc := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"srcObject"))
 	sseDst := encrypt.DefaultPBKDF([]byte("correct horse battery staple"), []byte(bucketName+"dstObject"))
-	// c.TraceOn(os.Stderr)
 	testEncryptedCopyObjectWrapper(c, bucketName, sseSrc, sseDst)
 }
 
@@ -9503,13 +9102,7 @@ func testDecryptedCopyObject() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
@@ -9563,26 +9156,14 @@ func testSSECMultipartEncryptedToSSECCopyObjectPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -9761,26 +9342,14 @@ func testSSECEncryptedToSSECCopyObjectPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -9939,26 +9508,14 @@ func testSSECEncryptedToUnencryptedCopyPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -10116,26 +9673,14 @@ func testSSECEncryptedToSSES3CopyObjectPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -10296,26 +9841,14 @@ func testUnencryptedToSSECCopyObjectPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -10471,26 +10004,14 @@ func testUnencryptedToUnencryptedCopyPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -10642,26 +10163,14 @@ func testUnencryptedToSSES3CopyObjectPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -10815,26 +10324,14 @@ func testSSES3EncryptedToSSECCopyObjectPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -10991,26 +10488,14 @@ func testSSES3EncryptedToUnencryptedCopyPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -11163,26 +10648,14 @@ func testSSES3EncryptedToSSES3CopyObjectPart() {
 	function := "CopyObjectPart(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	client, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	client, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
 	}
 
 	// Instantiate new core client object.
-	c := minio.Core{client}
-
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
+	c := minio.Core{Client: client}
 
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test")
@@ -11337,19 +10810,12 @@ func testUserMetadataCopying() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// c.TraceOn(os.Stderr)
 	testUserMetadataCopyingWrapper(c)
 }
 
@@ -11514,19 +10980,12 @@ func testUserMetadataCopyingV2() {
 	function := "CopyObject(destination, source)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
 		return
 	}
 
-	// c.TraceOn(os.Stderr)
 	testUserMetadataCopyingWrapper(c)
 }
 
@@ -11537,13 +10996,7 @@ func testStorageClassMetadataPutObject() {
 	args := map[string]interface{}{}
 	testName := getFuncName()
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
@@ -11625,13 +11078,7 @@ func testStorageClassInvalidMetadataPutObject() {
 	args := map[string]interface{}{}
 	testName := getFuncName()
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
@@ -11668,13 +11115,7 @@ func testStorageClassMetadataCopyObject() {
 	args := map[string]interface{}{}
 	testName := getFuncName()
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-			Transport: createHTTPTransport(),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO v4 client object creation failed", err)
 		return
@@ -11795,27 +11236,12 @@ func testPutObjectNoLengthV2() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
+		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -11871,27 +11297,12 @@ func testPutObjectsUnknownV2() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
+		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -11962,27 +11373,12 @@ func testPutObject0ByteV2() {
 		"opts":       "",
 	}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
+		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -12027,13 +11423,7 @@ func testComposeObjectErrorCases() {
 	function := "ComposeObject(destination, sourceList)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
@@ -12050,13 +11440,7 @@ func testCompose10KSources() {
 	function := "ComposeObject(destination, sourceList)"
 	args := map[string]interface{}{}
 
-	// Instantiate new minio client object
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
@@ -12074,26 +11458,12 @@ func testFunctionalV2() {
 	functionAll := ""
 	args := map[string]interface{}{}
 
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
-
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-			Transport: createHTTPTransport(),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
 		return
 	}
 
-	// Enable to debug
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	location := "us-east-1"
@@ -12157,7 +11527,6 @@ func testFunctionalV2() {
 		"bucketPolicy": readWritePolicy,
 	}
 	err = c.SetBucketPolicy(context.Background(), bucketName, readWritePolicy)
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "SetBucketPolicy failed", err)
 		return
@@ -12328,18 +11697,12 @@ func testFunctionalV2() {
 		return
 	}
 
-	transport, err := minio.DefaultTransport(mustParseBool(os.Getenv(enableHTTPS)))
-	if err != nil {
-		logError(testName, function, args, startTime, "", "DefaultTransport failed", err)
-		return
-	}
-
 	httpClient := &http.Client{
 		// Setting a sensible time out of 30secs to wait for response
 		// headers. Request is pro-actively canceled after 30secs
 		// with no response.
 		Timeout:   30 * time.Second,
-		Transport: transport,
+		Transport: createHTTPTransport(),
 	}
 
 	req, err := http.NewRequest(http.MethodHead, presignedHeadURL.String(), nil)
@@ -12534,27 +11897,13 @@ func testGetObjectContext() {
 		"bucketName": "",
 		"objectName": "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -12637,27 +11986,13 @@ func testFGetObjectContext() {
 		"objectName": "",
 		"fileName":   "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -12729,24 +12064,12 @@ func testGetObjectRanges() {
 	defer cancel()
 
 	rng := rand.NewSource(time.Now().UnixNano())
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rng, "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -12836,27 +12159,13 @@ func testGetObjectACLContext() {
 		"bucketName": "",
 		"objectName": "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -12888,7 +12197,6 @@ func testGetObjectACLContext() {
 			ContentType:  "binary/octet-stream",
 			UserMetadata: metaData,
 		})
-
 	if err != nil {
 		logError(testName, function, args, startTime, "", "PutObject failed", err)
 		return
@@ -13015,24 +12323,12 @@ func testPutObjectContextV2() {
 		"size":       "",
 		"opts":       "",
 	}
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
+		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Make a new bucket.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -13087,27 +12383,13 @@ func testGetObjectContextV2() {
 		"bucketName": "",
 		"objectName": "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
+		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -13188,27 +12470,13 @@ func testFGetObjectContextV2() {
 		"objectName": "",
 		"fileName":   "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV2(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{CredsV2: true})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MinIO client v2 object creation failed", err)
+		logError(testName, function, args, startTime, "", "MinIO v2 client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -13277,27 +12545,13 @@ func testListObjects() {
 		"objectPrefix": "",
 		"recursive":    "true",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
 		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
 	// Generate a new random bucket name.
 	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
 	args["bucketName"] = bucketName
@@ -13367,91 +12621,891 @@ func testListObjects() {
 	logSuccess(testName, function, args, startTime)
 }
 
-// Test deleting multiple objects with object retention set in Governance mode
-func testRemoveObjects() {
-	// initialize logging params
+// testCors is runnable against S3 itself.
+// Just provide the env var MINIO_GO_TEST_BUCKET_CORS with bucket that is public and WILL BE DELETED.
+// Recreate this manually each time. Minio-go SDK does not support calling
+// SetPublicBucket (put-public-access-block) on S3, otherwise we could script the whole thing.
+func testCors() {
+	ctx := context.Background()
 	startTime := time.Now()
 	testName := getFuncName()
-	function := "RemoveObjects(bucketName, objectsCh, opts)"
+	function := "SetBucketCors(bucketName, cors)"
 	args := map[string]interface{}{
-		"bucketName":   "",
-		"objectPrefix": "",
-		"recursive":    "true",
+		"bucketName": "",
+		"cors":       "",
 	}
-	// Seed random based on current time.
-	rand.Seed(time.Now().Unix())
 
-	// Instantiate new minio client object.
-	c, err := minio.New(os.Getenv(serverEndpoint),
-		&minio.Options{
-			Creds:     credentials.NewStaticV4(os.Getenv(accessKey), os.Getenv(secretKey), ""),
-			Transport: createHTTPTransport(),
-			Secure:    mustParseBool(os.Getenv(enableHTTPS)),
-		})
+	c, err := NewClient(ClientConfig{})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
+		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
 		return
 	}
 
-	// Enable tracing, write to stderr.
-	// c.TraceOn(os.Stderr)
-
-	// Set user agent.
-	c.SetAppInfo("MinIO-go-FunctionalTest", appVersion)
-
-	// Generate a new random bucket name.
-	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	// Create or reuse a bucket that will get cors settings applied to it and deleted when done
+	bucketName := os.Getenv("MINIO_GO_TEST_BUCKET_CORS")
+	if bucketName == "" {
+		bucketName = randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+		err = c.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{Region: "us-east-1"})
+		if err != nil {
+			logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+			return
+		}
+	}
 	args["bucketName"] = bucketName
-	objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "")
-	args["objectName"] = objectName
+	defer cleanupBucket(bucketName, c)
 
-	// Make a new bucket.
-	err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1", ObjectLocking: true})
+	publicPolicy := `{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":{"AWS":["*"]},"Action":["s3:*"],"Resource":["arn:aws:s3:::` + bucketName + `", "arn:aws:s3:::` + bucketName + `/*"]}]}`
+	err = c.SetBucketPolicy(ctx, bucketName, publicPolicy)
 	if err != nil {
-		logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+		logError(testName, function, args, startTime, "", "SetBucketPolicy failed", err)
 		return
 	}
 
-	bufSize := dataFileMap["datafile-129-MB"]
-	reader := getDataReader("datafile-129-MB")
-	defer reader.Close()
+	// Upload an object for testing.
+	objectContents := `some-text-file-contents`
+	reader := strings.NewReader(objectContents)
+	bufSize := int64(len(objectContents))
 
-	_, err = c.PutObject(context.Background(), bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{})
+	objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+	args["objectName"] = objectName
+
+	_, err = c.PutObject(ctx, bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{ContentType: "binary/octet-stream"})
 	if err != nil {
-		logError(testName, function, args, startTime, "", "Error uploading object", err)
+		logError(testName, function, args, startTime, "", "PutObject call failed", err)
 		return
 	}
+	bucketURL := c.EndpointURL().String() + "/" + bucketName + "/"
+	objectURL := bucketURL + objectName
 
-	// Replace with smaller...
-	bufSize = dataFileMap["datafile-10-kB"]
-	reader = getDataReader("datafile-10-kB")
-	defer reader.Close()
-
-	_, err = c.PutObject(context.Background(), bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{})
-	if err != nil {
-		logError(testName, function, args, startTime, "", "Error uploading object", err)
+	httpClient := &http.Client{
+		Timeout:   30 * time.Second,
+		Transport: createHTTPTransport(),
 	}
 
-	t := time.Date(2030, time.April, 25, 14, 0, 0, 0, time.UTC)
-	m := minio.RetentionMode(minio.Governance)
-	opts := minio.PutObjectRetentionOptions{
-		GovernanceBypass: false,
-		RetainUntilDate:  &t,
-		Mode:             &m,
-	}
-	err = c.PutObjectRetention(context.Background(), bucketName, objectName, opts)
-	if err != nil {
-		logError(testName, function, args, startTime, "", "Error setting retention", err)
-		return
-	}
+	errStrAccessForbidden := `<Error><Code>AccessForbidden</Code><Message>CORSResponse: This CORS request is not allowed. This is usually because the evalution of Origin, request method / Access-Control-Request-Method or Access-Control-Request-Headers are not whitelisted`
+	testCases := []struct {
+		name string
 
-	objectsCh := make(chan minio.ObjectInfo)
-	// Send object names that are needed to be removed to objectsCh
-	go func() {
-		defer close(objectsCh)
-		// List all objects from a bucket-name with a matching prefix.
-		for object := range c.ListObjects(context.Background(), bucketName, minio.ListObjectsOptions{UseV1: true, Recursive: true}) {
-			if object.Err != nil {
+		// Cors rules to apply
+		applyCorsRules []cors.Rule
+
+		// Outbound request info
+		method  string
+		url     string
+		headers map[string]string
+
+		// Wanted response
+		wantStatus       int
+		wantHeaders      map[string]string
+		wantBodyContains string
+	}{
+		{
+			name: "apply bucket rules",
+			applyCorsRules: []cors.Rule{
+				{
+					AllowedOrigin: []string{"https"}, // S3 documents 'https' origin, but it does not actually work, see test below.
+					AllowedMethod: []string{"PUT"},
+					AllowedHeader: []string{"*"},
+				},
+				{
+					AllowedOrigin: []string{"http://www.example1.com"},
+					AllowedMethod: []string{"PUT"},
+					AllowedHeader: []string{"*"},
+					ExposeHeader:  []string{"x-amz-server-side-encryption", "x-amz-request-id"},
+					MaxAgeSeconds: 3600,
+				},
+				{
+					AllowedOrigin: []string{"http://www.example2.com"},
+					AllowedMethod: []string{"POST"},
+					AllowedHeader: []string{"X-My-Special-Header"},
+					ExposeHeader:  []string{"X-AMZ-Request-ID"},
+				},
+				{
+					AllowedOrigin: []string{"http://www.example3.com"},
+					AllowedMethod: []string{"PUT"},
+					AllowedHeader: []string{"X-Example-3-Special-Header"},
+					MaxAgeSeconds: 10,
+				},
+				{
+					AllowedOrigin: []string{"*"},
+					AllowedMethod: []string{"GET"},
+					AllowedHeader: []string{"*"},
+					ExposeHeader:  []string{"x-amz-request-id", "X-AMZ-server-side-encryption"},
+					MaxAgeSeconds: 3600,
+				},
+				{
+					AllowedOrigin: []string{"http://multiplemethodstest.com"},
+					AllowedMethod: []string{"POST", "PUT", "DELETE"},
+					AllowedHeader: []string{"x-abc-*", "x-def-*"},
+				},
+				{
+					AllowedOrigin: []string{"http://UPPERCASEEXAMPLE.com"},
+					AllowedMethod: []string{"DELETE"},
+				},
+				{
+					AllowedOrigin: []string{"https://*"},
+					AllowedMethod: []string{"DELETE"},
+					AllowedHeader: []string{"x-abc-*", "x-def-*"},
+				},
+			},
+		},
+		{
+			name:   "preflight to object url matches example1 rule",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "http://www.example1.com",
+				"Access-Control-Request-Method":  "PUT",
+				"Access-Control-Request-Headers": "x-another-header,x-could-be-anything",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Origin":      "http://www.example1.com",
+				"Access-Control-Allow-Methods":     "PUT",
+				"Access-Control-Allow-Headers":     "x-another-header,x-could-be-anything",
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Max-Age":           "3600",
+				"Content-Length":                   "0",
+				// S3 additionally sets the following headers here, MinIO follows fetch spec and does not:
+				// "Access-Control-Expose-Headers":    "",
+			},
+		},
+		{
+			name:   "preflight to bucket url matches example1 rule",
+			method: http.MethodOptions,
+			url:    bucketURL,
+			headers: map[string]string{
+				"Origin":                         "http://www.example1.com",
+				"Access-Control-Request-Method":  "PUT",
+				"Access-Control-Request-Headers": "x-another-header,x-could-be-anything",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Origin":      "http://www.example1.com",
+				"Access-Control-Allow-Methods":     "PUT",
+				"Access-Control-Allow-Headers":     "x-another-header,x-could-be-anything",
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Max-Age":           "3600",
+				"Content-Length":                   "0",
+			},
+		},
+		{
+			name:   "preflight matches example2 rule with header given",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "http://www.example2.com",
+				"Access-Control-Request-Method":  "POST",
+				"Access-Control-Request-Headers": "X-My-Special-Header",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Origin":      "http://www.example2.com",
+				"Access-Control-Allow-Methods":     "POST",
+				"Access-Control-Allow-Headers":     "x-my-special-header",
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Max-Age":           "",
+				"Content-Length":                   "0",
+			},
+		},
+		{
+			name:   "preflight matches example2 rule with no header given",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://www.example2.com",
+				"Access-Control-Request-Method": "POST",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Origin":      "http://www.example2.com",
+				"Access-Control-Allow-Methods":     "POST",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Max-Age":           "",
+				"Content-Length":                   "0",
+			},
+		},
+		{
+			name:   "preflight matches wildcard origin rule",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "http://www.couldbeanything.com",
+				"Access-Control-Request-Method":  "GET",
+				"Access-Control-Request-Headers": "x-custom-header,x-other-custom-header",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Origin":      "*",
+				"Access-Control-Allow-Methods":     "GET",
+				"Access-Control-Allow-Headers":     "x-custom-header,x-other-custom-header",
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Max-Age":           "3600",
+				"Content-Length":                   "0",
+			},
+		},
+		{
+			name:   "preflight does not match any rule",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://www.couldbeanything.com",
+				"Access-Control-Request-Method": "DELETE",
+			},
+			wantStatus:       http.StatusForbidden,
+			wantBodyContains: errStrAccessForbidden,
+		},
+		{
+			name:   "preflight does not match example1 rule because of method",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://www.example1.com",
+				"Access-Control-Request-Method": "POST",
+			},
+			wantStatus:       http.StatusForbidden,
+			wantBodyContains: errStrAccessForbidden,
+		},
+		{
+			name:   "s3 processes cors rules even when request is not preflight if cors headers present test get",
+			method: http.MethodGet,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "http://www.example1.com",
+				"Access-Control-Request-Headers": "x-another-header,x-could-be-anything",
+				"Access-Control-Request-Method":  "PUT",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Origin":      "http://www.example1.com",
+				"Access-Control-Expose-Headers":    "x-amz-server-side-encryption,x-amz-request-id",
+				// S3 additionally sets the following headers here, MinIO follows fetch spec and does not:
+				// "Access-Control-Allow-Headers":     "x-another-header,x-could-be-anything",
+				// "Access-Control-Allow-Methods":     "PUT",
+				// "Access-Control-Max-Age":           "3600",
+			},
+		},
+		{
+			name:   "s3 processes cors rules even when request is not preflight if cors headers present test put",
+			method: http.MethodPut,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://www.example1.com",
+				"Access-Control-Request-Method": "GET",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Allow-Origin":      "*",
+				"Access-Control-Expose-Headers":    "x-amz-request-id,x-amz-server-side-encryption",
+				// S3 additionally sets the following headers here, MinIO follows fetch spec and does not:
+				// "Access-Control-Allow-Headers":     "x-another-header,x-could-be-anything",
+				// "Access-Control-Allow-Methods":     "PUT",
+				// "Access-Control-Max-Age":           "3600",
+			},
+		},
+		{
+			name:   "s3 processes cors rules even when request is not preflight but there is no rule match",
+			method: http.MethodGet,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "http://www.example1.com",
+				"Access-Control-Request-Headers": "x-another-header,x-could-be-anything",
+				"Access-Control-Request-Method":  "DELETE",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Methods":     "",
+				"Access-Control-Allow-Origin":      "",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "get request matches wildcard origin rule and returns cors headers",
+			method: http.MethodGet,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin": "http://www.example1.com",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Allow-Origin":      "*",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "x-amz-request-id,X-AMZ-server-side-encryption",
+				// S3 returns the following headers, MinIO follows fetch spec and does not:
+				// "Access-Control-Max-Age":           "3600",
+				// "Access-Control-Allow-Methods":     "GET",
+			},
+		},
+		{
+			name:   "head request does not match rule and returns no cors headers",
+			method: http.MethodHead,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin": "http://www.nomatchingdomainfound.com",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Allow-Methods":     "",
+				"Access-Control-Allow-Origin":      "",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "put request with origin does not match rule and returns no cors headers",
+			method: http.MethodPut,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin": "http://www.nomatchingdomainfound.com",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Allow-Methods":     "",
+				"Access-Control-Allow-Origin":      "",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:       "put request with no origin does not match rule and returns no cors headers",
+			method:     http.MethodPut,
+			url:        objectURL,
+			headers:    map[string]string{},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Allow-Methods":     "",
+				"Access-Control-Allow-Origin":      "",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "preflight for delete request with wildcard origin does not match",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://www.notsecureexample.com",
+				"Access-Control-Request-Method": "DELETE",
+			},
+			wantStatus:       http.StatusForbidden,
+			wantBodyContains: errStrAccessForbidden,
+		},
+		{
+			name:   "preflight for delete request with wildcard https origin matches secureexample",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "https://www.secureexample.com",
+				"Access-Control-Request-Method": "DELETE",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Methods":     "DELETE",
+				"Access-Control-Allow-Origin":      "https://www.secureexample.com",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "preflight for delete request matches secureexample with wildcard https origin and request headers",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "https://www.secureexample.com",
+				"Access-Control-Request-Method":  "DELETE",
+				"Access-Control-Request-Headers": "x-abc-1,x-abc-second,x-def-1",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Methods":     "DELETE",
+				"Access-Control-Allow-Origin":      "https://www.secureexample.com",
+				"Access-Control-Allow-Headers":     "x-abc-1,x-abc-second,x-def-1",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "preflight for delete request matches secureexample rejected because request header does not match",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "https://www.secureexample.com",
+				"Access-Control-Request-Method":  "DELETE",
+				"Access-Control-Request-Headers": "x-abc-1,x-abc-second,x-def-1,x-does-not-match",
+			},
+			wantStatus:       http.StatusForbidden,
+			wantBodyContains: errStrAccessForbidden,
+		},
+		{
+			name:   "preflight with https origin is documented by s3 as matching but it does not match",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "https://www.securebutdoesnotmatch.com",
+				"Access-Control-Request-Method": "PUT",
+			},
+			wantStatus:       http.StatusForbidden,
+			wantBodyContains: errStrAccessForbidden,
+		},
+		{
+			name:       "put no origin no match returns no cors headers",
+			method:     http.MethodPut,
+			url:        objectURL,
+			headers:    map[string]string{},
+			wantStatus: http.StatusOK,
+
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Allow-Methods":     "",
+				"Access-Control-Allow-Origin":      "",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "put with origin match example1 returns cors headers",
+			method: http.MethodPut,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin": "http://www.example1.com",
+			},
+			wantStatus: http.StatusOK,
+
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Origin":      "http://www.example1.com",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "x-amz-server-side-encryption,x-amz-request-id",
+				// S3 returns the following headers, MinIO follows fetch spec and does not:
+				// "Access-Control-Max-Age":           "3600",
+				// "Access-Control-Allow-Methods":     "PUT",
+			},
+		},
+		{
+			name:   "put with origin and header match example1 returns cors headers",
+			method: http.MethodPut,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":              "http://www.example1.com",
+				"x-could-be-anything": "myvalue",
+			},
+			wantStatus: http.StatusOK,
+
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Origin":      "http://www.example1.com",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "x-amz-server-side-encryption,x-amz-request-id",
+				// S3 returns the following headers, MinIO follows fetch spec and does not:
+				// "Access-Control-Max-Age":           "3600",
+				// "Access-Control-Allow-Methods":     "PUT",
+			},
+		},
+		{
+			name:   "put no match found returns no cors headers",
+			method: http.MethodPut,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin": "http://www.unmatchingdomain.com",
+			},
+			wantStatus: http.StatusOK,
+
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "",
+				"Access-Control-Allow-Methods":     "",
+				"Access-Control-Allow-Origin":      "",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "put with origin match example3 returns cors headers",
+			method: http.MethodPut,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":              "http://www.example3.com",
+				"X-My-Special-Header": "myvalue",
+			},
+			wantStatus: http.StatusOK,
+
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Origin":      "http://www.example3.com",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				// S3 returns the following headers, MinIO follows fetch spec and does not:
+				// "Access-Control-Max-Age":           "10",
+				// "Access-Control-Allow-Methods":     "PUT",
+			},
+		},
+		{
+			name:   "preflight matches example1 rule headers case is incorrect",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://www.example1.com",
+				"Access-Control-Request-Method": "PUT",
+				// Fetch standard guarantees that these are sent lowercase, here we test what happens when they are not.
+				"Access-Control-Request-Headers": "X-Another-Header,X-Could-Be-Anything",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Origin":      "http://www.example1.com",
+				"Access-Control-Allow-Methods":     "PUT",
+				"Access-Control-Allow-Headers":     "x-another-header,x-could-be-anything",
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Max-Age":           "3600",
+				"Content-Length":                   "0",
+				// S3 returns the following headers, MinIO follows fetch spec and does not:
+				// "Access-Control-Expose-Headers":    "x-amz-server-side-encryption,x-amz-request-id",
+			},
+		},
+		{
+			name:   "preflight matches example1 rule headers are not sorted",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://www.example1.com",
+				"Access-Control-Request-Method": "PUT",
+				// Fetch standard guarantees that these are sorted, test what happens when they are not.
+				"Access-Control-Request-Headers": "a-customer-header,b-should-be-last",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Origin":      "http://www.example1.com",
+				"Access-Control-Allow-Methods":     "PUT",
+				"Access-Control-Allow-Headers":     "a-customer-header,b-should-be-last",
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Max-Age":           "3600",
+				"Content-Length":                   "0",
+				// S3 returns the following headers, MinIO follows fetch spec and does not:
+				// "Access-Control-Expose-Headers":    "x-amz-server-side-encryption,x-amz-request-id",
+			},
+		},
+		{
+			name:   "preflight with case sensitivity in origin matches uppercase",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://UPPERCASEEXAMPLE.com",
+				"Access-Control-Request-Method": "DELETE",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Methods":     "DELETE",
+				"Access-Control-Allow-Origin":      "http://UPPERCASEEXAMPLE.com",
+				"Access-Control-Allow-Headers":     "",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+			},
+		},
+		{
+			name:   "preflight with case sensitivity in origin does not match when lowercase",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                        "http://uppercaseexample.com",
+				"Access-Control-Request-Method": "DELETE",
+			},
+			wantStatus:       http.StatusForbidden,
+			wantBodyContains: errStrAccessForbidden,
+		},
+		{
+			name:   "preflight match upper case with unknown header but no header restrictions",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "http://UPPERCASEEXAMPLE.com",
+				"Access-Control-Request-Method":  "DELETE",
+				"Access-Control-Request-Headers": "x-unknown-1",
+			},
+			wantStatus:       http.StatusForbidden,
+			wantBodyContains: errStrAccessForbidden,
+		},
+		{
+			name:   "preflight for delete request matches multiplemethodstest.com origin and request headers",
+			method: http.MethodOptions,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin":                         "http://multiplemethodstest.com",
+				"Access-Control-Request-Method":  "DELETE",
+				"Access-Control-Request-Headers": "x-abc-1",
+			},
+			wantStatus: http.StatusOK,
+			wantHeaders: map[string]string{
+				"Access-Control-Allow-Credentials": "true",
+				"Access-Control-Allow-Origin":      "http://multiplemethodstest.com",
+				"Access-Control-Allow-Headers":     "x-abc-1",
+				"Access-Control-Expose-Headers":    "",
+				"Access-Control-Max-Age":           "",
+				// S3 returns POST, PUT, DELETE here, MinIO does not as spec does not require it.
+				// "Access-Control-Allow-Methods":     "DELETE",
+			},
+		},
+		{
+			name:   "delete request goes ahead because cors is only for browsers and does not block on the server side",
+			method: http.MethodDelete,
+			url:    objectURL,
+			headers: map[string]string{
+				"Origin": "http://www.justrandom.com",
+			},
+			wantStatus: http.StatusNoContent,
+		},
+	}
+
+	for i, test := range testCases {
+		testName := fmt.Sprintf("%s_%d_%s", testName, i+1, strings.ReplaceAll(test.name, " ", "_"))
+
+		// Apply the CORS rules
+		if test.applyCorsRules != nil {
+			corsConfig := &cors.Config{
+				CORSRules: test.applyCorsRules,
+			}
+			err = c.SetBucketCors(ctx, bucketName, corsConfig)
+			if err != nil {
+				logError(testName, function, args, startTime, "", "SetBucketCors failed to apply", err)
+				return
+			}
+		}
+
+		// Make request
+		if test.method != "" && test.url != "" {
+			req, err := http.NewRequestWithContext(ctx, test.method, test.url, nil)
+			if err != nil {
+				logError(testName, function, args, startTime, "", "HTTP request creation failed", err)
+				return
+			}
+			req.Header.Set("User-Agent", "MinIO-go-FunctionalTest/"+appVersion)
+
+			for k, v := range test.headers {
+				req.Header.Set(k, v)
+			}
+			resp, err := httpClient.Do(req)
+			if err != nil {
+				logError(testName, function, args, startTime, "", "HTTP request failed", err)
+				return
+			}
+			defer resp.Body.Close()
+
+			// Check returned status code
+			if resp.StatusCode != test.wantStatus {
+				errStr := fmt.Sprintf(" incorrect status code in response, want: %d, got: %d", test.wantStatus, resp.StatusCode)
+				logError(testName, function, args, startTime, "", errStr, nil)
+				return
+			}
+
+			// Check returned body
+			if test.wantBodyContains != "" {
+				body, err := io.ReadAll(resp.Body)
+				if err != nil {
+					logError(testName, function, args, startTime, "", "Failed to read response body", err)
+					return
+				}
+				if !strings.Contains(string(body), test.wantBodyContains) {
+					errStr := fmt.Sprintf(" incorrect body in response, want: %s, in got: %s", test.wantBodyContains, string(body))
+					logError(testName, function, args, startTime, "", errStr, nil)
+					return
+				}
+			}
+
+			// Check returned response headers
+			for k, v := range test.wantHeaders {
+				gotVal := resp.Header.Get(k)
+				if k == "Access-Control-Expose-Headers" {
+					// MinIO returns this in canonical form, S3 does not.
+					gotVal = strings.ToLower(gotVal)
+					v = strings.ToLower(v)
+				}
+				// Remove all spaces, S3 adds spaces after CSV values in headers, MinIO does not.
+				gotVal = strings.ReplaceAll(gotVal, " ", "")
+				if gotVal != v {
+					errStr := fmt.Sprintf(" incorrect header in response, want: %s: '%s', got: '%s'", k, v, gotVal)
+					logError(testName, function, args, startTime, "", errStr, nil)
+					return
+				}
+			}
+		}
+		logSuccess(testName, function, args, startTime)
+	}
+	logSuccess(testName, function, args, startTime)
+}
+
+func testCorsSetGetDelete() {
+	ctx := context.Background()
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "SetBucketCors(bucketName, cors)"
+	args := map[string]interface{}{
+		"bucketName": "",
+		"cors":       "",
+	}
+
+	c, err := NewClient(ClientConfig{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client object creation failed", err)
+		return
+	}
+
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	args["bucketName"] = bucketName
+
+	// Make a new bucket.
+	err = c.MakeBucket(ctx, bucketName, minio.MakeBucketOptions{Region: "us-east-1"})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+		return
+	}
+	defer cleanupBucket(bucketName, c)
+
+	// Set the CORS rules on the new bucket
+	corsRules := []cors.Rule{
+		{
+			AllowedOrigin: []string{"http://www.example1.com"},
+			AllowedMethod: []string{"PUT"},
+			AllowedHeader: []string{"*"},
+		},
+		{
+			AllowedOrigin: []string{"http://www.example2.com"},
+			AllowedMethod: []string{"POST"},
+			AllowedHeader: []string{"X-My-Special-Header"},
+		},
+		{
+			AllowedOrigin: []string{"*"},
+			AllowedMethod: []string{"GET"},
+			AllowedHeader: []string{"*"},
+		},
+	}
+	corsConfig := cors.NewConfig(corsRules)
+	err = c.SetBucketCors(ctx, bucketName, corsConfig)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "SetBucketCors failed to apply", err)
+		return
+	}
+
+	// Get the rules and check they match what we set
+	gotCorsConfig, err := c.GetBucketCors(ctx, bucketName)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "GetBucketCors failed", err)
+		return
+	}
+	if !reflect.DeepEqual(corsConfig, gotCorsConfig) {
+		msg := fmt.Sprintf("GetBucketCors returned unexpected rules, expected: %+v, got: %+v", corsConfig, gotCorsConfig)
+		logError(testName, function, args, startTime, "", msg, nil)
+		return
+	}
+
+	// Delete the rules
+	err = c.SetBucketCors(ctx, bucketName, nil)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "SetBucketCors failed to delete", err)
+		return
+	}
+
+	// Get the rules and check they are now empty
+	gotCorsConfig, err = c.GetBucketCors(ctx, bucketName)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "GetBucketCors failed", err)
+		return
+	}
+	if gotCorsConfig != nil {
+		logError(testName, function, args, startTime, "", "GetBucketCors returned unexpected rules", nil)
+		return
+	}
+
+	logSuccess(testName, function, args, startTime)
+}
+
+// Test deleting multiple objects with object retention set in Governance mode
+func testRemoveObjects() {
+	// initialize logging params
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "RemoveObjects(bucketName, objectsCh, opts)"
+	args := map[string]interface{}{
+		"bucketName":   "",
+		"objectPrefix": "",
+		"recursive":    "true",
+	}
+
+	c, err := NewClient(ClientConfig{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
+		return
+	}
+
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	args["bucketName"] = bucketName
+	objectName := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+	args["objectName"] = objectName
+
+	// Make a new bucket.
+	err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1", ObjectLocking: true})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+		return
+	}
+
+	bufSize := dataFileMap["datafile-129-MB"]
+	reader := getDataReader("datafile-129-MB")
+	defer reader.Close()
+
+	_, err = c.PutObject(context.Background(), bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "Error uploading object", err)
+		return
+	}
+
+	// Replace with smaller...
+	bufSize = dataFileMap["datafile-10-kB"]
+	reader = getDataReader("datafile-10-kB")
+	defer reader.Close()
+
+	_, err = c.PutObject(context.Background(), bucketName, objectName, reader, int64(bufSize), minio.PutObjectOptions{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "Error uploading object", err)
+	}
+
+	t := time.Date(2030, time.April, 25, 14, 0, 0, 0, time.UTC)
+	m := minio.RetentionMode(minio.Governance)
+	opts := minio.PutObjectRetentionOptions{
+		GovernanceBypass: false,
+		RetainUntilDate:  &t,
+		Mode:             &m,
+	}
+	err = c.PutObjectRetention(context.Background(), bucketName, objectName, opts)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "Error setting retention", err)
+		return
+	}
+
+	objectsCh := make(chan minio.ObjectInfo)
+	// Send object names that are needed to be removed to objectsCh
+	go func() {
+		defer close(objectsCh)
+		// List all objects from a bucket-name with a matching prefix.
+		for object := range c.ListObjects(context.Background(), bucketName, minio.ListObjectsOptions{UseV1: true, Recursive: true}) {
+			if object.Err != nil {
 				logError(testName, function, args, startTime, "", "Error listing objects", object.Err)
 				return
 			}
@@ -13503,6 +13557,203 @@ func testRemoveObjects() {
 	logSuccess(testName, function, args, startTime)
 }
 
+// Test get bucket tags
+func testGetBucketTagging() {
+	// initialize logging params
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "GetBucketTagging(bucketName)"
+	args := map[string]interface{}{
+		"bucketName": "",
+	}
+
+	c, err := NewClient(ClientConfig{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
+		return
+	}
+
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	args["bucketName"] = bucketName
+
+	// Make a new bucket.
+	err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1", ObjectLocking: true})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+		return
+	}
+
+	_, err = c.GetBucketTagging(context.Background(), bucketName)
+	if minio.ToErrorResponse(err).Code != "NoSuchTagSet" {
+		logError(testName, function, args, startTime, "", "Invalid error from server failed", err)
+		return
+	}
+
+	if err = cleanupVersionedBucket(bucketName, c); err != nil {
+		logError(testName, function, args, startTime, "", "CleanupBucket failed", err)
+		return
+	}
+
+	logSuccess(testName, function, args, startTime)
+}
+
+// Test setting tags for bucket
+func testSetBucketTagging() {
+	// initialize logging params
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "SetBucketTagging(bucketName, tags)"
+	args := map[string]interface{}{
+		"bucketName": "",
+		"tags":       "",
+	}
+
+	c, err := NewClient(ClientConfig{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
+		return
+	}
+
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	args["bucketName"] = bucketName
+
+	// Make a new bucket.
+	err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1", ObjectLocking: true})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+		return
+	}
+
+	_, err = c.GetBucketTagging(context.Background(), bucketName)
+	if minio.ToErrorResponse(err).Code != "NoSuchTagSet" {
+		logError(testName, function, args, startTime, "", "Invalid error from server", err)
+		return
+	}
+
+	tag := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+	expectedValue := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+
+	t, err := tags.MapToBucketTags(map[string]string{
+		tag: expectedValue,
+	})
+	args["tags"] = t.String()
+	if err != nil {
+		logError(testName, function, args, startTime, "", "tags.MapToBucketTags failed", err)
+		return
+	}
+
+	err = c.SetBucketTagging(context.Background(), bucketName, t)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "SetBucketTagging failed", err)
+		return
+	}
+
+	tagging, err := c.GetBucketTagging(context.Background(), bucketName)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "GetBucketTagging failed", err)
+		return
+	}
+
+	if tagging.ToMap()[tag] != expectedValue {
+		msg := fmt.Sprintf("Tag %s; got value %s; wanted %s", tag, tagging.ToMap()[tag], expectedValue)
+		logError(testName, function, args, startTime, "", msg, err)
+		return
+	}
+
+	// Delete all objects and buckets
+	if err = cleanupVersionedBucket(bucketName, c); err != nil {
+		logError(testName, function, args, startTime, "", "CleanupBucket failed", err)
+		return
+	}
+
+	logSuccess(testName, function, args, startTime)
+}
+
+// Test removing bucket tags
+func testRemoveBucketTagging() {
+	// initialize logging params
+	startTime := time.Now()
+	testName := getFuncName()
+	function := "RemoveBucketTagging(bucketName)"
+	args := map[string]interface{}{
+		"bucketName": "",
+	}
+
+	c, err := NewClient(ClientConfig{})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MinIO client v4 object creation failed", err)
+		return
+	}
+
+	// Generate a new random bucket name.
+	bucketName := randString(60, rand.NewSource(time.Now().UnixNano()), "minio-go-test-")
+	args["bucketName"] = bucketName
+
+	// Make a new bucket.
+	err = c.MakeBucket(context.Background(), bucketName, minio.MakeBucketOptions{Region: "us-east-1", ObjectLocking: true})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "MakeBucket failed", err)
+		return
+	}
+
+	_, err = c.GetBucketTagging(context.Background(), bucketName)
+	if minio.ToErrorResponse(err).Code != "NoSuchTagSet" {
+		logError(testName, function, args, startTime, "", "Invalid error from server", err)
+		return
+	}
+
+	tag := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+	expectedValue := randString(60, rand.NewSource(time.Now().UnixNano()), "")
+
+	t, err := tags.MapToBucketTags(map[string]string{
+		tag: expectedValue,
+	})
+	if err != nil {
+		logError(testName, function, args, startTime, "", "tags.MapToBucketTags failed", err)
+		return
+	}
+
+	err = c.SetBucketTagging(context.Background(), bucketName, t)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "SetBucketTagging failed", err)
+		return
+	}
+
+	tagging, err := c.GetBucketTagging(context.Background(), bucketName)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "GetBucketTagging failed", err)
+		return
+	}
+
+	if tagging.ToMap()[tag] != expectedValue {
+		msg := fmt.Sprintf("Tag %s; got value %s; wanted %s", tag, tagging.ToMap()[tag], expectedValue)
+		logError(testName, function, args, startTime, "", msg, err)
+		return
+	}
+
+	err = c.RemoveBucketTagging(context.Background(), bucketName)
+	if err != nil {
+		logError(testName, function, args, startTime, "", "RemoveBucketTagging failed", err)
+		return
+	}
+
+	_, err = c.GetBucketTagging(context.Background(), bucketName)
+	if minio.ToErrorResponse(err).Code != "NoSuchTagSet" {
+		logError(testName, function, args, startTime, "", "Invalid error from server", err)
+		return
+	}
+
+	// Delete all objects and buckets
+	if err = cleanupVersionedBucket(bucketName, c); err != nil {
+		logError(testName, function, args, startTime, "", "CleanupBucket failed", err)
+		return
+	}
+
+	logSuccess(testName, function, args, startTime)
+}
+
 // Convert string to bool and always return false if any error
 func mustParseBool(str string) bool {
 	b, err := strconv.ParseBool(str)
@@ -13536,6 +13787,9 @@ func main() {
 
 	// execute tests
 	if isFullMode() {
+		testCorsSetGetDelete()
+		testCors()
+		testListMultipartUpload()
 		testGetObjectAttributes()
 		testGetObjectAttributesErrorCases()
 		testMakeBucketErrorV2()
@@ -13551,7 +13805,9 @@ func main() {
 		testCompose10KSourcesV2()
 		testUserMetadataCopyingV2()
 		testPutObjectWithChecksums()
-		testPutMultipartObjectWithChecksums()
+		testPutObjectWithTrailingChecksums()
+		testPutMultipartObjectWithChecksums(false)
+		testPutMultipartObjectWithChecksums(true)
 		testPutObject0ByteV2()
 		testPutObjectNoLengthV2()
 		testPutObjectsUnknownV2()
@@ -13576,6 +13832,7 @@ func main() {
 		testGetObjectReadAtFunctional()
 		testGetObjectReadAtWhenEOFWasReached()
 		testPresignedPostPolicy()
+		testPresignedPostPolicyWrongFile()
 		testCopyObject()
 		testComposeObjectErrorCases()
 		testCompose10KSources()
@@ -13606,6 +13863,9 @@ func main() {
 		testObjectTaggingWithVersioning()
 		testTrailingChecksums()
 		testPutObjectWithAutomaticChecksums()
+		testGetBucketTagging()
+		testSetBucketTagging()
+		testRemoveBucketTagging()
 
 		// SSE-C tests will only work over TLS connection.
 		if tls {
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/cors/cors.go b/vendor/github.com/minio/minio-go/v7/pkg/cors/cors.go
new file mode 100644
index 00000000..e71864ee
--- /dev/null
+++ b/vendor/github.com/minio/minio-go/v7/pkg/cors/cors.go
@@ -0,0 +1,91 @@
+/*
+ * MinIO Go Library for Amazon S3 Compatible Cloud Storage
+ * Copyright 2015-2024 MinIO, Inc.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package cors
+
+import (
+	"encoding/xml"
+	"fmt"
+	"io"
+	"strings"
+
+	"github.com/dustin/go-humanize"
+)
+
+const defaultXMLNS = "http://s3.amazonaws.com/doc/2006-03-01/"
+
+// Config is the container for a CORS configuration for a bucket.
+type Config struct {
+	XMLNS     string   `xml:"xmlns,attr,omitempty"`
+	XMLName   xml.Name `xml:"CORSConfiguration"`
+	CORSRules []Rule   `xml:"CORSRule"`
+}
+
+// Rule is a single rule in a CORS configuration.
+type Rule struct {
+	AllowedHeader []string `xml:"AllowedHeader,omitempty"`
+	AllowedMethod []string `xml:"AllowedMethod,omitempty"`
+	AllowedOrigin []string `xml:"AllowedOrigin,omitempty"`
+	ExposeHeader  []string `xml:"ExposeHeader,omitempty"`
+	ID            string   `xml:"ID,omitempty"`
+	MaxAgeSeconds int      `xml:"MaxAgeSeconds,omitempty"`
+}
+
+// NewConfig creates a new CORS configuration with the given rules.
+func NewConfig(rules []Rule) *Config {
+	return &Config{
+		XMLNS: defaultXMLNS,
+		XMLName: xml.Name{
+			Local: "CORSConfiguration",
+			Space: defaultXMLNS,
+		},
+		CORSRules: rules,
+	}
+}
+
+// ParseBucketCorsConfig parses a CORS configuration in XML from an io.Reader.
+func ParseBucketCorsConfig(reader io.Reader) (*Config, error) {
+	var c Config
+
+	// Max size of cors document is 64KiB according to https://docs.aws.amazon.com/AmazonS3/latest/API/API_PutBucketCors.html
+	// This limiter is just for safety so has a max of 128KiB
+	err := xml.NewDecoder(io.LimitReader(reader, 128*humanize.KiByte)).Decode(&c)
+	if err != nil {
+		return nil, fmt.Errorf("decoding xml: %w", err)
+	}
+	if c.XMLNS == "" {
+		c.XMLNS = defaultXMLNS
+	}
+	for i, rule := range c.CORSRules {
+		for j, method := range rule.AllowedMethod {
+			c.CORSRules[i].AllowedMethod[j] = strings.ToUpper(method)
+		}
+	}
+	return &c, nil
+}
+
+// ToXML marshals the CORS configuration to XML.
+func (c Config) ToXML() ([]byte, error) {
+	if c.XMLNS == "" {
+		c.XMLNS = defaultXMLNS
+	}
+	data, err := xml.Marshal(&c)
+	if err != nil {
+		return nil, fmt.Errorf("marshaling xml: %w", err)
+	}
+	return append([]byte(xml.Header), data...), nil
+}
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go
index 8c5c4eb2..541e1a72 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_aws_credentials.go
@@ -26,7 +26,7 @@ import (
 	"strings"
 	"time"
 
-	ini "gopkg.in/ini.v1"
+	"github.com/go-ini/ini"
 )
 
 // A externalProcessCredentials stores the output of a credential_process
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go
index eb777675..750e26ff 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/file_minio_client.go
@@ -22,7 +22,7 @@ import (
 	"path/filepath"
 	"runtime"
 
-	jsoniter "github.com/json-iterator/go"
+	"github.com/goccy/go-json"
 )
 
 // A FileMinioClient retrieves credentials from the current user's home
@@ -39,7 +39,7 @@ type FileMinioClient struct {
 	Filename string
 
 	// MinIO Alias to extract credentials from the shared credentials file. If empty
-	// will default to environment variable "MINIO_ALIAS" or "default" if
+	// will default to environment variable "MINIO_ALIAS" or "s3" if
 	// environment variable is also not set.
 	Alias string
 
@@ -121,8 +121,6 @@ type config struct {
 // returned if it fails to read from the file.
 func loadAlias(filename, alias string) (hostConfig, error) {
 	cfg := &config{}
-	json := jsoniter.ConfigCompatibleWithStandardLibrary
-
 	configBytes, err := os.ReadFile(filename)
 	if err != nil {
 		return hostConfig{}, err
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go
index 7322948e..ea4b3ef9 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/iam_aws.go
@@ -31,7 +31,7 @@ import (
 	"strings"
 	"time"
 
-	jsoniter "github.com/json-iterator/go"
+	"github.com/goccy/go-json"
 )
 
 // DefaultExpiryWindow - Default expiry window.
@@ -308,7 +308,7 @@ func getEcsTaskCredentials(client *http.Client, endpoint, token string) (ec2Role
 	}
 
 	respCreds := ec2RoleCredRespBody{}
-	if err := jsoniter.NewDecoder(resp.Body).Decode(&respCreds); err != nil {
+	if err := json.NewDecoder(resp.Body).Decode(&respCreds); err != nil {
 		return ec2RoleCredRespBody{}, err
 	}
 
@@ -418,7 +418,7 @@ func getCredentials(client *http.Client, endpoint string) (ec2RoleCredRespBody,
 	}
 
 	respCreds := ec2RoleCredRespBody{}
-	if err := jsoniter.NewDecoder(resp.Body).Decode(&respCreds); err != nil {
+	if err := json.NewDecoder(resp.Body).Decode(&respCreds); err != nil {
 		return ec2RoleCredRespBody{}, err
 	}
 
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go
index 596d9515..8c06bac6 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/credentials/sts_web_identity.go
@@ -25,6 +25,7 @@ import (
 	"io"
 	"net/http"
 	"net/url"
+	"os"
 	"strconv"
 	"strings"
 	"time"
@@ -57,9 +58,10 @@ type WebIdentityResult struct {
 
 // WebIdentityToken - web identity token with expiry.
 type WebIdentityToken struct {
-	Token       string
-	AccessToken string
-	Expiry      int
+	Token        string
+	AccessToken  string
+	RefreshToken string
+	Expiry       int
 }
 
 // A STSWebIdentity retrieves credentials from MinIO service, and keeps track if
@@ -85,29 +87,59 @@ type STSWebIdentity struct {
 	// assuming.
 	RoleARN string
 
+	// Policy is the policy where the credentials should be limited too.
+	Policy string
+
 	// roleSessionName is the identifier for the assumed role session.
 	roleSessionName string
 }
 
 // NewSTSWebIdentity returns a pointer to a new
 // Credentials object wrapping the STSWebIdentity.
-func NewSTSWebIdentity(stsEndpoint string, getWebIDTokenExpiry func() (*WebIdentityToken, error)) (*Credentials, error) {
+func NewSTSWebIdentity(stsEndpoint string, getWebIDTokenExpiry func() (*WebIdentityToken, error), opts ...func(*STSWebIdentity)) (*Credentials, error) {
 	if stsEndpoint == "" {
 		return nil, errors.New("STS endpoint cannot be empty")
 	}
 	if getWebIDTokenExpiry == nil {
 		return nil, errors.New("Web ID token and expiry retrieval function should be defined")
 	}
-	return New(&STSWebIdentity{
+	i := &STSWebIdentity{
 		Client: &http.Client{
 			Transport: http.DefaultTransport,
 		},
 		STSEndpoint:         stsEndpoint,
 		GetWebIDTokenExpiry: getWebIDTokenExpiry,
-	}), nil
+	}
+	for _, o := range opts {
+		o(i)
+	}
+	return New(i), nil
+}
+
+// NewKubernetesIdentity returns a pointer to a new
+// Credentials object using the Kubernetes service account
+func NewKubernetesIdentity(stsEndpoint string, opts ...func(*STSWebIdentity)) (*Credentials, error) {
+	return NewSTSWebIdentity(stsEndpoint, func() (*WebIdentityToken, error) {
+		token, err := os.ReadFile("/var/run/secrets/kubernetes.io/serviceaccount/token")
+		if err != nil {
+			return nil, err
+		}
+
+		return &WebIdentityToken{
+			Token: string(token),
+		}, nil
+	}, opts...)
+}
+
+// WithPolicy option will enforce that the returned credentials
+// will be scoped down to the specified policy
+func WithPolicy(policy string) func(*STSWebIdentity) {
+	return func(i *STSWebIdentity) {
+		i.Policy = policy
+	}
 }
 
-func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSessionName string,
+func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSessionName string, policy string,
 	getWebIDTokenExpiry func() (*WebIdentityToken, error),
 ) (AssumeRoleWithWebIdentityResponse, error) {
 	idToken, err := getWebIDTokenExpiry()
@@ -130,9 +162,16 @@ func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSession
 		// Usually set when server is using extended userInfo endpoint.
 		v.Set("WebIdentityAccessToken", idToken.AccessToken)
 	}
+	if idToken.RefreshToken != "" {
+		// Usually set when server is using extended userInfo endpoint.
+		v.Set("WebIdentityRefreshToken", idToken.RefreshToken)
+	}
 	if idToken.Expiry > 0 {
 		v.Set("DurationSeconds", fmt.Sprintf("%d", idToken.Expiry))
 	}
+	if policy != "" {
+		v.Set("Policy", policy)
+	}
 	v.Set("Version", STSVersion)
 
 	u, err := url.Parse(endpoint)
@@ -183,7 +222,7 @@ func getWebIdentityCredentials(clnt *http.Client, endpoint, roleARN, roleSession
 // Retrieve retrieves credentials from the MinIO service.
 // Error will be returned if the request fails.
 func (m *STSWebIdentity) Retrieve() (Value, error) {
-	a, err := getWebIdentityCredentials(m.Client, m.STSEndpoint, m.RoleARN, m.roleSessionName, m.GetWebIDTokenExpiry)
+	a, err := getWebIdentityCredentials(m.Client, m.STSEndpoint, m.RoleARN, m.roleSessionName, m.Policy, m.GetWebIDTokenExpiry)
 	if err != nil {
 		return Value{}, err
 	}
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go b/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go
index a7081c59..c40e40a1 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/encrypt/server-side.go
@@ -23,7 +23,7 @@ import (
 	"errors"
 	"net/http"
 
-	jsoniter "github.com/json-iterator/go"
+	"github.com/goccy/go-json"
 	"golang.org/x/crypto/argon2"
 )
 
@@ -101,7 +101,6 @@ func NewSSEKMS(keyID string, context interface{}) (ServerSide, error) {
 	if context == nil {
 		return kms{key: keyID, hasContext: false}, nil
 	}
-	json := jsoniter.ConfigCompatibleWithStandardLibrary
 	serializedContext, err := json.Marshal(context)
 	if err != nil {
 		return nil, err
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go b/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go
index 10c95ffe..344af2b7 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/lifecycle/lifecycle.go
@@ -414,11 +414,54 @@ func (e Expiration) MarshalXML(en *xml.Encoder, startElement xml.StartElement) e
 	return en.EncodeElement(expirationWrapper(e), startElement)
 }
 
+// DelMarkerExpiration represents DelMarkerExpiration actions element in an ILM policy
+type DelMarkerExpiration struct {
+	XMLName xml.Name `xml:"DelMarkerExpiration" json:"-"`
+	Days    int      `xml:"Days,omitempty" json:"Days,omitempty"`
+}
+
+// IsNull returns true if Days isn't specified and false otherwise.
+func (de DelMarkerExpiration) IsNull() bool {
+	return de.Days == 0
+}
+
+// MarshalXML avoids serializing an empty DelMarkerExpiration element
+func (de DelMarkerExpiration) MarshalXML(enc *xml.Encoder, start xml.StartElement) error {
+	if de.IsNull() {
+		return nil
+	}
+	type delMarkerExp DelMarkerExpiration
+	return enc.EncodeElement(delMarkerExp(de), start)
+}
+
+// AllVersionsExpiration represents AllVersionsExpiration actions element in an ILM policy
+type AllVersionsExpiration struct {
+	XMLName      xml.Name           `xml:"AllVersionsExpiration" json:"-"`
+	Days         int                `xml:"Days,omitempty" json:"Days,omitempty"`
+	DeleteMarker ExpireDeleteMarker `xml:"DeleteMarker,omitempty" json:"DeleteMarker,omitempty"`
+}
+
+// IsNull returns true if days field is 0
+func (e AllVersionsExpiration) IsNull() bool {
+	return e.Days == 0
+}
+
+// MarshalXML satisfies xml.Marshaler to provide custom encoding
+func (e AllVersionsExpiration) MarshalXML(enc *xml.Encoder, start xml.StartElement) error {
+	if e.IsNull() {
+		return nil
+	}
+	type allVersionsExp AllVersionsExpiration
+	return enc.EncodeElement(allVersionsExp(e), start)
+}
+
 // MarshalJSON customizes json encoding by omitting empty values
 func (r Rule) MarshalJSON() ([]byte, error) {
 	type rule struct {
 		AbortIncompleteMultipartUpload *AbortIncompleteMultipartUpload `json:"AbortIncompleteMultipartUpload,omitempty"`
 		Expiration                     *Expiration                     `json:"Expiration,omitempty"`
+		DelMarkerExpiration            *DelMarkerExpiration            `json:"DelMarkerExpiration,omitempty"`
+		AllVersionsExpiration          *AllVersionsExpiration          `json:"AllVersionsExpiration,omitempty"`
 		ID                             string                          `json:"ID"`
 		RuleFilter                     *Filter                         `json:"Filter,omitempty"`
 		NoncurrentVersionExpiration    *NoncurrentVersionExpiration    `json:"NoncurrentVersionExpiration,omitempty"`
@@ -442,6 +485,9 @@ func (r Rule) MarshalJSON() ([]byte, error) {
 	if !r.Expiration.IsNull() {
 		newr.Expiration = &r.Expiration
 	}
+	if !r.DelMarkerExpiration.IsNull() {
+		newr.DelMarkerExpiration = &r.DelMarkerExpiration
+	}
 	if !r.Transition.IsNull() {
 		newr.Transition = &r.Transition
 	}
@@ -451,6 +497,9 @@ func (r Rule) MarshalJSON() ([]byte, error) {
 	if !r.NoncurrentVersionTransition.isNull() {
 		newr.NoncurrentVersionTransition = &r.NoncurrentVersionTransition
 	}
+	if !r.AllVersionsExpiration.IsNull() {
+		newr.AllVersionsExpiration = &r.AllVersionsExpiration
+	}
 
 	return json.Marshal(newr)
 }
@@ -460,6 +509,8 @@ type Rule struct {
 	XMLName                        xml.Name                       `xml:"Rule,omitempty" json:"-"`
 	AbortIncompleteMultipartUpload AbortIncompleteMultipartUpload `xml:"AbortIncompleteMultipartUpload,omitempty" json:"AbortIncompleteMultipartUpload,omitempty"`
 	Expiration                     Expiration                     `xml:"Expiration,omitempty" json:"Expiration,omitempty"`
+	DelMarkerExpiration            DelMarkerExpiration            `xml:"DelMarkerExpiration,omitempty" json:"DelMarkerExpiration,omitempty"`
+	AllVersionsExpiration          AllVersionsExpiration          `xml:"AllVersionsExpiration,omitempty" json:"AllVersionsExpiration,omitempty"`
 	ID                             string                         `xml:"ID" json:"ID"`
 	RuleFilter                     Filter                         `xml:"Filter,omitempty" json:"Filter,omitempty"`
 	NoncurrentVersionExpiration    NoncurrentVersionExpiration    `xml:"NoncurrentVersionExpiration,omitempty"  json:"NoncurrentVersionExpiration,omitempty"`
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/notification/notification.go b/vendor/github.com/minio/minio-go/v7/pkg/notification/notification.go
index a44799d2..151ca21e 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/notification/notification.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/notification/notification.go
@@ -50,6 +50,7 @@ const (
 	ObjectRemovedAll                                   EventType = "s3:ObjectRemoved:*"
 	ObjectRemovedDelete                                EventType = "s3:ObjectRemoved:Delete"
 	ObjectRemovedDeleteMarkerCreated                   EventType = "s3:ObjectRemoved:DeleteMarkerCreated"
+	ILMDelMarkerExpirationDelete                       EventType = "s3:LifecycleDelMarkerExpiration:Delete"
 	ObjectReducedRedundancyLostObject                  EventType = "s3:ReducedRedundancyLostObject"
 	ObjectTransitionAll                                EventType = "s3:ObjectTransition:*"
 	ObjectTransitionFailed                             EventType = "s3:ObjectTransition:Failed"
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/replication/replication.go b/vendor/github.com/minio/minio-go/v7/pkg/replication/replication.go
index 0abbf6ef..65a2f75e 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/replication/replication.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/replication/replication.go
@@ -406,6 +406,9 @@ func (c *Config) EditRule(opts Options) error {
 			return fmt.Errorf("priority must be unique. Replication configuration already has a rule with this priority")
 		}
 		if rule.Destination.Bucket != newRule.Destination.Bucket && rule.ID == newRule.ID {
+			if c.Role == newRule.Destination.Bucket {
+				continue
+			}
 			return fmt.Errorf("invalid destination bucket for this rule")
 		}
 	}
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go b/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go
index 056e78a6..0e63ce2f 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/s3utils/utils.go
@@ -226,7 +226,7 @@ func IsGoogleEndpoint(endpointURL url.URL) bool {
 	if endpointURL == sentinelURL {
 		return false
 	}
-	return endpointURL.Host == "storage.googleapis.com"
+	return endpointURL.Hostname() == "storage.googleapis.com"
 }
 
 // Expects ascii encoded strings - from output of urlEncodePath
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/set/stringset.go b/vendor/github.com/minio/minio-go/v7/pkg/set/stringset.go
index 2566a3df..c265ce57 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/set/stringset.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/set/stringset.go
@@ -21,14 +21,12 @@ import (
 	"fmt"
 	"sort"
 
-	jsoniter "github.com/json-iterator/go"
+	"github.com/goccy/go-json"
 )
 
 // StringSet - uses map as set of strings.
 type StringSet map[string]struct{}
 
-var json = jsoniter.ConfigCompatibleWithStandardLibrary
-
 // ToSlice - returns StringSet as string slice.
 func (set StringSet) ToSlice() []string {
 	keys := make([]string, 0, len(set))
diff --git a/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go b/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go
index 7a84a6f3..33465c63 100644
--- a/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go
+++ b/vendor/github.com/minio/minio-go/v7/pkg/tags/tags.go
@@ -69,7 +69,7 @@ const (
 // https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/Using_Tags.html#tag-restrictions
 // borrowed from this article and also testing various ASCII characters following regex
 // is supported by AWS S3 for both tags and values.
-var validTagKeyValue = regexp.MustCompile(`^[a-zA-Z0-9-+\-._:/@ ]+$`)
+var validTagKeyValue = regexp.MustCompile(`^[a-zA-Z0-9-+\-._:/@ =]+$`)
 
 func checkKey(key string) error {
 	if len(key) == 0 {
diff --git a/vendor/github.com/minio/minio-go/v7/post-policy.go b/vendor/github.com/minio/minio-go/v7/post-policy.go
index 3f4881e8..26bf441b 100644
--- a/vendor/github.com/minio/minio-go/v7/post-policy.go
+++ b/vendor/github.com/minio/minio-go/v7/post-policy.go
@@ -19,12 +19,14 @@ package minio
 
 import (
 	"encoding/base64"
+	"errors"
 	"fmt"
 	"net/http"
 	"strings"
 	"time"
 
 	"github.com/minio/minio-go/v7/pkg/encrypt"
+	"github.com/minio/minio-go/v7/pkg/tags"
 )
 
 // expirationDateFormat date format for expiration key in json policy.
@@ -83,7 +85,7 @@ func (p *PostPolicy) SetExpires(t time.Time) error {
 
 // SetKey - Sets an object name for the policy based upload.
 func (p *PostPolicy) SetKey(key string) error {
-	if strings.TrimSpace(key) == "" || key == "" {
+	if strings.TrimSpace(key) == "" {
 		return errInvalidArgument("Object name is empty.")
 	}
 	policyCond := policyCondition{
@@ -116,7 +118,7 @@ func (p *PostPolicy) SetKeyStartsWith(keyStartsWith string) error {
 
 // SetBucket - Sets bucket at which objects will be uploaded to.
 func (p *PostPolicy) SetBucket(bucketName string) error {
-	if strings.TrimSpace(bucketName) == "" || bucketName == "" {
+	if strings.TrimSpace(bucketName) == "" {
 		return errInvalidArgument("Bucket name is empty.")
 	}
 	policyCond := policyCondition{
@@ -133,7 +135,7 @@ func (p *PostPolicy) SetBucket(bucketName string) error {
 
 // SetCondition - Sets condition for credentials, date and algorithm
 func (p *PostPolicy) SetCondition(matchType, condition, value string) error {
-	if strings.TrimSpace(value) == "" || value == "" {
+	if strings.TrimSpace(value) == "" {
 		return errInvalidArgument("No value specified for condition")
 	}
 
@@ -152,10 +154,31 @@ func (p *PostPolicy) SetCondition(matchType, condition, value string) error {
 	return errInvalidArgument("Invalid condition in policy")
 }
 
+// SetTagging - Sets tagging for the object for this policy based upload.
+func (p *PostPolicy) SetTagging(tagging string) error {
+	if strings.TrimSpace(tagging) == "" {
+		return errInvalidArgument("No tagging specified.")
+	}
+	_, err := tags.ParseObjectXML(strings.NewReader(tagging))
+	if err != nil {
+		return errors.New("The XML you provided was not well-formed or did not validate against our published schema.") //nolint
+	}
+	policyCond := policyCondition{
+		matchType: "eq",
+		condition: "$tagging",
+		value:     tagging,
+	}
+	if err := p.addNewPolicy(policyCond); err != nil {
+		return err
+	}
+	p.formData["tagging"] = tagging
+	return nil
+}
+
 // SetContentType - Sets content-type of the object for this policy
 // based upload.
 func (p *PostPolicy) SetContentType(contentType string) error {
-	if strings.TrimSpace(contentType) == "" || contentType == "" {
+	if strings.TrimSpace(contentType) == "" {
 		return errInvalidArgument("No content type specified.")
 	}
 	policyCond := policyCondition{
@@ -186,27 +209,61 @@ func (p *PostPolicy) SetContentTypeStartsWith(contentTypeStartsWith string) erro
 	return nil
 }
 
+// SetContentDisposition - Sets content-disposition of the object for this policy
+func (p *PostPolicy) SetContentDisposition(contentDisposition string) error {
+	if strings.TrimSpace(contentDisposition) == "" {
+		return errInvalidArgument("No content disposition specified.")
+	}
+	policyCond := policyCondition{
+		matchType: "eq",
+		condition: "$Content-Disposition",
+		value:     contentDisposition,
+	}
+	if err := p.addNewPolicy(policyCond); err != nil {
+		return err
+	}
+	p.formData["Content-Disposition"] = contentDisposition
+	return nil
+}
+
+// SetContentEncoding - Sets content-encoding of the object for this policy
+func (p *PostPolicy) SetContentEncoding(contentEncoding string) error {
+	if strings.TrimSpace(contentEncoding) == "" {
+		return errInvalidArgument("No content encoding specified.")
+	}
+	policyCond := policyCondition{
+		matchType: "eq",
+		condition: "$Content-Encoding",
+		value:     contentEncoding,
+	}
+	if err := p.addNewPolicy(policyCond); err != nil {
+		return err
+	}
+	p.formData["Content-Encoding"] = contentEncoding
+	return nil
+}
+
 // SetContentLengthRange - Set new min and max content length
 // condition for all incoming uploads.
-func (p *PostPolicy) SetContentLengthRange(min, max int64) error {
-	if min > max {
+func (p *PostPolicy) SetContentLengthRange(minLen, maxLen int64) error {
+	if minLen > maxLen {
 		return errInvalidArgument("Minimum limit is larger than maximum limit.")
 	}
-	if min < 0 {
+	if minLen < 0 {
 		return errInvalidArgument("Minimum limit cannot be negative.")
 	}
-	if max <= 0 {
+	if maxLen <= 0 {
 		return errInvalidArgument("Maximum limit cannot be non-positive.")
 	}
-	p.contentLengthRange.min = min
-	p.contentLengthRange.max = max
+	p.contentLengthRange.min = minLen
+	p.contentLengthRange.max = maxLen
 	return nil
 }
 
 // SetSuccessActionRedirect - Sets the redirect success url of the object for this policy
 // based upload.
 func (p *PostPolicy) SetSuccessActionRedirect(redirect string) error {
-	if strings.TrimSpace(redirect) == "" || redirect == "" {
+	if strings.TrimSpace(redirect) == "" {
 		return errInvalidArgument("Redirect is empty")
 	}
 	policyCond := policyCondition{
@@ -224,7 +281,7 @@ func (p *PostPolicy) SetSuccessActionRedirect(redirect string) error {
 // SetSuccessStatusAction - Sets the status success code of the object for this policy
 // based upload.
 func (p *PostPolicy) SetSuccessStatusAction(status string) error {
-	if strings.TrimSpace(status) == "" || status == "" {
+	if strings.TrimSpace(status) == "" {
 		return errInvalidArgument("Status is empty")
 	}
 	policyCond := policyCondition{
@@ -242,10 +299,10 @@ func (p *PostPolicy) SetSuccessStatusAction(status string) error {
 // SetUserMetadata - Set user metadata as a key/value couple.
 // Can be retrieved through a HEAD request or an event.
 func (p *PostPolicy) SetUserMetadata(key, value string) error {
-	if strings.TrimSpace(key) == "" || key == "" {
+	if strings.TrimSpace(key) == "" {
 		return errInvalidArgument("Key is empty")
 	}
-	if strings.TrimSpace(value) == "" || value == "" {
+	if strings.TrimSpace(value) == "" {
 		return errInvalidArgument("Value is empty")
 	}
 	headerName := fmt.Sprintf("x-amz-meta-%s", key)
@@ -261,12 +318,49 @@ func (p *PostPolicy) SetUserMetadata(key, value string) error {
 	return nil
 }
 
+// SetUserMetadataStartsWith - Set how an user metadata should starts with.
+// Can be retrieved through a HEAD request or an event.
+func (p *PostPolicy) SetUserMetadataStartsWith(key, value string) error {
+	if strings.TrimSpace(key) == "" {
+		return errInvalidArgument("Key is empty")
+	}
+	headerName := fmt.Sprintf("x-amz-meta-%s", key)
+	policyCond := policyCondition{
+		matchType: "starts-with",
+		condition: fmt.Sprintf("$%s", headerName),
+		value:     value,
+	}
+	if err := p.addNewPolicy(policyCond); err != nil {
+		return err
+	}
+	p.formData[headerName] = value
+	return nil
+}
+
 // SetChecksum sets the checksum of the request.
-func (p *PostPolicy) SetChecksum(c Checksum) {
+func (p *PostPolicy) SetChecksum(c Checksum) error {
 	if c.IsSet() {
 		p.formData[amzChecksumAlgo] = c.Type.String()
 		p.formData[c.Type.Key()] = c.Encoded()
+
+		policyCond := policyCondition{
+			matchType: "eq",
+			condition: fmt.Sprintf("$%s", amzChecksumAlgo),
+			value:     c.Type.String(),
+		}
+		if err := p.addNewPolicy(policyCond); err != nil {
+			return err
+		}
+		policyCond = policyCondition{
+			matchType: "eq",
+			condition: fmt.Sprintf("$%s", c.Type.Key()),
+			value:     c.Encoded(),
+		}
+		if err := p.addNewPolicy(policyCond); err != nil {
+			return err
+		}
 	}
+	return nil
 }
 
 // SetEncryption - sets encryption headers for POST API
diff --git a/vendor/github.com/minio/minio-go/v7/retry-continous.go b/vendor/github.com/minio/minio-go/v7/retry-continous.go
index bfeea95f..81fcf16f 100644
--- a/vendor/github.com/minio/minio-go/v7/retry-continous.go
+++ b/vendor/github.com/minio/minio-go/v7/retry-continous.go
@@ -20,7 +20,7 @@ package minio
 import "time"
 
 // newRetryTimerContinous creates a timer with exponentially increasing delays forever.
-func (c *Client) newRetryTimerContinous(unit, cap time.Duration, jitter float64, doneCh chan struct{}) <-chan int {
+func (c *Client) newRetryTimerContinous(baseSleep, maxSleep time.Duration, jitter float64, doneCh chan struct{}) <-chan int {
 	attemptCh := make(chan int)
 
 	// normalize jitter to the range [0, 1.0]
@@ -39,10 +39,10 @@ func (c *Client) newRetryTimerContinous(unit, cap time.Duration, jitter float64,
 		if attempt > maxAttempt {
 			attempt = maxAttempt
 		}
-		// sleep = random_between(0, min(cap, base * 2 ** attempt))
-		sleep := unit * time.Duration(1<<uint(attempt))
-		if sleep > cap {
-			sleep = cap
+		// sleep = random_between(0, min(maxSleep, base * 2 ** attempt))
+		sleep := baseSleep * time.Duration(1<<uint(attempt))
+		if sleep > maxSleep {
+			sleep = maxSleep
 		}
 		if jitter != NoJitter {
 			sleep -= time.Duration(c.random.Float64() * float64(sleep) * jitter)
diff --git a/vendor/github.com/minio/minio-go/v7/retry.go b/vendor/github.com/minio/minio-go/v7/retry.go
index 5ddcad89..4cc45920 100644
--- a/vendor/github.com/minio/minio-go/v7/retry.go
+++ b/vendor/github.com/minio/minio-go/v7/retry.go
@@ -45,7 +45,7 @@ var DefaultRetryCap = time.Second
 
 // newRetryTimer creates a timer with exponentially increasing
 // delays until the maximum retry attempts are reached.
-func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, unit, cap time.Duration, jitter float64) <-chan int {
+func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, baseSleep, maxSleep time.Duration, jitter float64) <-chan int {
 	attemptCh := make(chan int)
 
 	// computes the exponential backoff duration according to
@@ -59,10 +59,10 @@ func (c *Client) newRetryTimer(ctx context.Context, maxRetry int, unit, cap time
 			jitter = MaxJitter
 		}
 
-		// sleep = random_between(0, min(cap, base * 2 ** attempt))
-		sleep := unit * time.Duration(1<<uint(attempt))
-		if sleep > cap {
-			sleep = cap
+		// sleep = random_between(0, min(maxSleep, base * 2 ** attempt))
+		sleep := baseSleep * time.Duration(1<<uint(attempt))
+		if sleep > maxSleep {
+			sleep = maxSleep
 		}
 		if jitter != NoJitter {
 			sleep -= time.Duration(c.random.Float64() * float64(sleep) * jitter)
@@ -129,9 +129,10 @@ func isHTTPStatusRetryable(httpStatusCode int) (ok bool) {
 }
 
 // For now, all http Do() requests are retriable except some well defined errors
-func isRequestErrorRetryable(err error) bool {
+func isRequestErrorRetryable(ctx context.Context, err error) bool {
 	if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
-		return false
+		// Retry if internal timeout in the HTTP call.
+		return ctx.Err() == nil
 	}
 	if ue, ok := err.(*url.Error); ok {
 		e := ue.Unwrap()
diff --git a/vendor/github.com/minio/minio-go/v7/s3-endpoints.go b/vendor/github.com/minio/minio-go/v7/s3-endpoints.go
index 068a6bfa..01cee8a1 100644
--- a/vendor/github.com/minio/minio-go/v7/s3-endpoints.go
+++ b/vendor/github.com/minio/minio-go/v7/s3-endpoints.go
@@ -44,6 +44,10 @@ var awsS3EndpointMap = map[string]awsS3Endpoint{
 		"s3.ca-central-1.amazonaws.com",
 		"s3.dualstack.ca-central-1.amazonaws.com",
 	},
+	"ca-west-1": {
+		"s3.ca-west-1.amazonaws.com",
+		"s3.dualstack.ca-west-1.amazonaws.com",
+	},
 	"eu-west-1": {
 		"s3.eu-west-1.amazonaws.com",
 		"s3.dualstack.eu-west-1.amazonaws.com",
diff --git a/vendor/github.com/minio/minio-go/v7/s3-error.go b/vendor/github.com/minio/minio-go/v7/s3-error.go
index f365157e..f7fad19f 100644
--- a/vendor/github.com/minio/minio-go/v7/s3-error.go
+++ b/vendor/github.com/minio/minio-go/v7/s3-error.go
@@ -57,5 +57,6 @@ var s3ErrorResponseMap = map[string]string{
 	"BucketAlreadyOwnedByYou":           "Your previous request to create the named bucket succeeded and you already own it.",
 	"InvalidDuration":                   "Duration provided in the request is invalid.",
 	"XAmzContentSHA256Mismatch":         "The provided 'x-amz-content-sha256' header does not match what was computed.",
+	"NoSuchCORSConfiguration":           "The specified bucket does not have a CORS configuration.",
 	// Add new API errors here.
 }
diff --git a/vendor/github.com/minio/minio-go/v7/utils.go b/vendor/github.com/minio/minio-go/v7/utils.go
index d68f1484..cd7d2c27 100644
--- a/vendor/github.com/minio/minio-go/v7/utils.go
+++ b/vendor/github.com/minio/minio-go/v7/utils.go
@@ -20,7 +20,7 @@ package minio
 import (
 	"context"
 	"crypto/md5"
-	fipssha256 "crypto/sha256"
+	"crypto/sha256"
 	"crypto/tls"
 	"encoding/base64"
 	"encoding/hex"
@@ -40,9 +40,7 @@ import (
 	"time"
 
 	md5simd "github.com/minio/md5-simd"
-	"github.com/minio/minio-go/v7/pkg/encrypt"
 	"github.com/minio/minio-go/v7/pkg/s3utils"
-	"github.com/minio/sha256-simd"
 )
 
 func trimEtag(etag string) string {
@@ -380,10 +378,11 @@ func ToObjectInfo(bucketName, objectName string, h http.Header) (ObjectInfo, err
 		Restore:      restore,
 
 		// Checksum values
-		ChecksumCRC32:  h.Get("x-amz-checksum-crc32"),
-		ChecksumCRC32C: h.Get("x-amz-checksum-crc32c"),
-		ChecksumSHA1:   h.Get("x-amz-checksum-sha1"),
-		ChecksumSHA256: h.Get("x-amz-checksum-sha256"),
+		ChecksumCRC32:     h.Get(ChecksumCRC32.Key()),
+		ChecksumCRC32C:    h.Get(ChecksumCRC32C.Key()),
+		ChecksumSHA1:      h.Get(ChecksumSHA1.Key()),
+		ChecksumSHA256:    h.Get(ChecksumSHA256.Key()),
+		ChecksumCRC64NVME: h.Get(ChecksumCRC64NVME.Key()),
 	}, nil
 }
 
@@ -512,19 +511,9 @@ func isAmzHeader(headerKey string) bool {
 	return strings.HasPrefix(key, "x-amz-meta-") || strings.HasPrefix(key, "x-amz-grant-") || key == "x-amz-acl" || isSSEHeader(headerKey) || strings.HasPrefix(key, "x-amz-checksum-")
 }
 
-var supportedReplicationEncryptionHeaders = map[string]bool{
-	"x-minio-replication-server-side-encryption-sealed-key":     true,
-	"x-minio-replication-server-side-encryption-seal-algorithm": true,
-	"x-minio-replication-server-side-encryption-iv":             true,
-	"x-minio-replication-encrypted-multipart":                   true,
-	"x-minio-replication-actual-object-size":                    true,
-	// Add more supported headers here.
-	// Must be lower case.
-}
-
-// isValidReplicationEncryptionHeader returns true if header is one of valid replication encryption headers
-func isValidReplicationEncryptionHeader(headerKey string) bool {
-	return supportedReplicationEncryptionHeaders[strings.ToLower(headerKey)]
+// isMinioHeader returns true if header is x-minio- header.
+func isMinioHeader(headerKey string) bool {
+	return strings.HasPrefix(strings.ToLower(headerKey), "x-minio-")
 }
 
 // supportedQueryValues is a list of query strings that can be passed in when using GetObject.
@@ -563,9 +552,6 @@ func newMd5Hasher() md5simd.Hasher {
 }
 
 func newSHA256Hasher() md5simd.Hasher {
-	if encrypt.FIPS {
-		return &hashWrapper{Hash: fipssha256.New(), isSHA256: true}
-	}
 	return &hashWrapper{Hash: sha256Pool.Get().(hash.Hash), isSHA256: true}
 }
 
@@ -713,3 +699,146 @@ func (h *hashReaderWrapper) Read(p []byte) (n int, err error) {
 	}
 	return n, err
 }
+
+// Following is ported from C to Go in 2016 by Justin Ruggles, with minimal alteration.
+// Used uint for unsigned long. Used uint32 for input arguments in order to match
+// the Go hash/crc32 package. zlib CRC32 combine (https://github.com/madler/zlib)
+// Modified for hash/crc64 by Klaus Post, 2024.
+func gf2MatrixTimes(mat []uint64, vec uint64) uint64 {
+	var sum uint64
+
+	for vec != 0 {
+		if vec&1 != 0 {
+			sum ^= mat[0]
+		}
+		vec >>= 1
+		mat = mat[1:]
+	}
+	return sum
+}
+
+func gf2MatrixSquare(square, mat []uint64) {
+	if len(square) != len(mat) {
+		panic("square matrix size mismatch")
+	}
+	for n := range mat {
+		square[n] = gf2MatrixTimes(mat, mat[n])
+	}
+}
+
+// crc32Combine returns the combined CRC-32 hash value of the two passed CRC-32
+// hash values crc1 and crc2. poly represents the generator polynomial
+// and len2 specifies the byte length that the crc2 hash covers.
+func crc32Combine(poly uint32, crc1, crc2 uint32, len2 int64) uint32 {
+	// degenerate case (also disallow negative lengths)
+	if len2 <= 0 {
+		return crc1
+	}
+
+	even := make([]uint64, 32) // even-power-of-two zeros operator
+	odd := make([]uint64, 32)  // odd-power-of-two zeros operator
+
+	// put operator for one zero bit in odd
+	odd[0] = uint64(poly) // CRC-32 polynomial
+	row := uint64(1)
+	for n := 1; n < 32; n++ {
+		odd[n] = row
+		row <<= 1
+	}
+
+	// put operator for two zero bits in even
+	gf2MatrixSquare(even, odd)
+
+	// put operator for four zero bits in odd
+	gf2MatrixSquare(odd, even)
+
+	// apply len2 zeros to crc1 (first square will put the operator for one
+	// zero byte, eight zero bits, in even)
+	crc1n := uint64(crc1)
+	for {
+		// apply zeros operator for this bit of len2
+		gf2MatrixSquare(even, odd)
+		if len2&1 != 0 {
+			crc1n = gf2MatrixTimes(even, crc1n)
+		}
+		len2 >>= 1
+
+		// if no more bits set, then done
+		if len2 == 0 {
+			break
+		}
+
+		// another iteration of the loop with odd and even swapped
+		gf2MatrixSquare(odd, even)
+		if len2&1 != 0 {
+			crc1n = gf2MatrixTimes(odd, crc1n)
+		}
+		len2 >>= 1
+
+		// if no more bits set, then done
+		if len2 == 0 {
+			break
+		}
+	}
+
+	// return combined crc
+	crc1n ^= uint64(crc2)
+	return uint32(crc1n)
+}
+
+func crc64Combine(poly uint64, crc1, crc2 uint64, len2 int64) uint64 {
+	// degenerate case (also disallow negative lengths)
+	if len2 <= 0 {
+		return crc1
+	}
+
+	even := make([]uint64, 64) // even-power-of-two zeros operator
+	odd := make([]uint64, 64)  // odd-power-of-two zeros operator
+
+	// put operator for one zero bit in odd
+	odd[0] = poly // CRC-64 polynomial
+	row := uint64(1)
+	for n := 1; n < 64; n++ {
+		odd[n] = row
+		row <<= 1
+	}
+
+	// put operator for two zero bits in even
+	gf2MatrixSquare(even, odd)
+
+	// put operator for four zero bits in odd
+	gf2MatrixSquare(odd, even)
+
+	// apply len2 zeros to crc1 (first square will put the operator for one
+	// zero byte, eight zero bits, in even)
+	crc1n := crc1
+	for {
+		// apply zeros operator for this bit of len2
+		gf2MatrixSquare(even, odd)
+		if len2&1 != 0 {
+			crc1n = gf2MatrixTimes(even, crc1n)
+		}
+		len2 >>= 1
+
+		// if no more bits set, then done
+		if len2 == 0 {
+			break
+		}
+
+		// another iteration of the loop with odd and even swapped
+		gf2MatrixSquare(odd, even)
+		if len2&1 != 0 {
+			crc1n = gf2MatrixTimes(odd, crc1n)
+		}
+		len2 >>= 1
+
+		// if no more bits set, then done
+		if len2 == 0 {
+			break
+		}
+	}
+
+	// return combined crc
+	crc1n ^= crc2
+	return crc1n
+}
diff --git a/vendor/github.com/minio/sha256-simd/.gitignore b/vendor/github.com/minio/sha256-simd/.gitignore
deleted file mode 100644
index c56069fe..00000000
--- a/vendor/github.com/minio/sha256-simd/.gitignore
+++ /dev/null
@@ -1 +0,0 @@
-*.test
\ No newline at end of file
diff --git a/vendor/github.com/minio/sha256-simd/LICENSE b/vendor/github.com/minio/sha256-simd/LICENSE
deleted file mode 100644
index d6456956..00000000
--- a/vendor/github.com/minio/sha256-simd/LICENSE
+++ /dev/null
@@ -1,202 +0,0 @@
-
-                                 Apache License
-                           Version 2.0, January 2004
-                        http://www.apache.org/licenses/
-
-   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
-
-   1. Definitions.
-
-      "License" shall mean the terms and conditions for use, reproduction,
-      and distribution as defined by Sections 1 through 9 of this document.
-
-      "Licensor" shall mean the copyright owner or entity authorized by
-      the copyright owner that is granting the License.
-
-      "Legal Entity" shall mean the union of the acting entity and all
-      other entities that control, are controlled by, or are under common
-      control with that entity. For the purposes of this definition,
-      "control" means (i) the power, direct or indirect, to cause the
-      direction or management of such entity, whether by contract or
-      otherwise, or (ii) ownership of fifty percent (50%) or more of the
-      outstanding shares, or (iii) beneficial ownership of such entity.
-
-      "You" (or "Your") shall mean an individual or Legal Entity
-      exercising permissions granted by this License.
-
-      "Source" form shall mean the preferred form for making modifications,
-      including but not limited to software source code, documentation
-      source, and configuration files.
-
-      "Object" form shall mean any form resulting from mechanical
-      transformation or translation of a Source form, including but
-      not limited to compiled object code, generated documentation,
-      and conversions to other media types.
-
-      "Work" shall mean the work of authorship, whether in Source or
-      Object form, made available under the License, as indicated by a
-      copyright notice that is included in or attached to the work
-      (an example is provided in the Appendix below).
-
-      "Derivative Works" shall mean any work, whether in Source or Object
-      form, that is based on (or derived from) the Work and for which the
-      editorial revisions, annotations, elaborations, or other modifications
-      represent, as a whole, an original work of authorship. For the purposes
-      of this License, Derivative Works shall not include works that remain
-      separable from, or merely link (or bind by name) to the interfaces of,
-      the Work and Derivative Works thereof.
-
-      "Contribution" shall mean any work of authorship, including
-      the original version of the Work and any modifications or additions
-      to that Work or Derivative Works thereof, that is intentionally
-      submitted to Licensor for inclusion in the Work by the copyright owner
-      or by an individual or Legal Entity authorized to submit on behalf of
-      the copyright owner. For the purposes of this definition, "submitted"
-      means any form of electronic, verbal, or written communication sent
-      to the Licensor or its representatives, including but not limited to
-      communication on electronic mailing lists, source code control systems,
-      and issue tracking systems that are managed by, or on behalf of, the
-      Licensor for the purpose of discussing and improving the Work, but
-      excluding communication that is conspicuously marked or otherwise
-      designated in writing by the copyright owner as "Not a Contribution."
-
-      "Contributor" shall mean Licensor and any individual or Legal Entity
-      on behalf of whom a Contribution has been received by Licensor and
-      subsequently incorporated within the Work.
-
-   2. Grant of Copyright License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      copyright license to reproduce, prepare Derivative Works of,
-      publicly display, publicly perform, sublicense, and distribute the
-      Work and such Derivative Works in Source or Object form.
-
-   3. Grant of Patent License. Subject to the terms and conditions of
-      this License, each Contributor hereby grants to You a perpetual,
-      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
-      (except as stated in this section) patent license to make, have made,
-      use, offer to sell, sell, import, and otherwise transfer the Work,
-      where such license applies only to those patent claims licensable
-      by such Contributor that are necessarily infringed by their
-      Contribution(s) alone or by combination of their Contribution(s)
-      with the Work to which such Contribution(s) was submitted. If You
-      institute patent litigation against any entity (including a
-      cross-claim or counterclaim in a lawsuit) alleging that the Work
-      or a Contribution incorporated within the Work constitutes direct
-      or contributory patent infringement, then any patent licenses
-      granted to You under this License for that Work shall terminate
-      as of the date such litigation is filed.
-
-   4. Redistribution. You may reproduce and distribute copies of the
-      Work or Derivative Works thereof in any medium, with or without
-      modifications, and in Source or Object form, provided that You
-      meet the following conditions:
-
-      (a) You must give any other recipients of the Work or
-          Derivative Works a copy of this License; and
-
-      (b) You must cause any modified files to carry prominent notices
-          stating that You changed the files; and
-
-      (c) You must retain, in the Source form of any Derivative Works
-          that You distribute, all copyright, patent, trademark, and
-          attribution notices from the Source form of the Work,
-          excluding those notices that do not pertain to any part of
-          the Derivative Works; and
-
-      (d) If the Work includes a "NOTICE" text file as part of its
-          distribution, then any Derivative Works that You distribute must
-          include a readable copy of the attribution notices contained
-          within such NOTICE file, excluding those notices that do not
-          pertain to any part of the Derivative Works, in at least one
-          of the following places: within a NOTICE text file distributed
-          as part of the Derivative Works; within the Source form or
-          documentation, if provided along with the Derivative Works; or,
-          within a display generated by the Derivative Works, if and
-          wherever such third-party notices normally appear. The contents
-          of the NOTICE file are for informational purposes only and
-          do not modify the License. You may add Your own attribution
-          notices within Derivative Works that You distribute, alongside
-          or as an addendum to the NOTICE text from the Work, provided
-          that such additional attribution notices cannot be construed
-          as modifying the License.
-
-      You may add Your own copyright statement to Your modifications and
-      may provide additional or different license terms and conditions
-      for use, reproduction, or distribution of Your modifications, or
-      for any such Derivative Works as a whole, provided Your use,
-      reproduction, and distribution of the Work otherwise complies with
-      the conditions stated in this License.
-
-   5. Submission of Contributions. Unless You explicitly state otherwise,
-      any Contribution intentionally submitted for inclusion in the Work
-      by You to the Licensor shall be under the terms and conditions of
-      this License, without any additional terms or conditions.
-      Notwithstanding the above, nothing herein shall supersede or modify
-      the terms of any separate license agreement you may have executed
-      with Licensor regarding such Contributions.
-
-   6. Trademarks. This License does not grant permission to use the trade
-      names, trademarks, service marks, or product names of the Licensor,
-      except as required for reasonable and customary use in describing the
-      origin of the Work and reproducing the content of the NOTICE file.
-
-   7. Disclaimer of Warranty. Unless required by applicable law or
-      agreed to in writing, Licensor provides the Work (and each
-      Contributor provides its Contributions) on an "AS IS" BASIS,
-      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
-      implied, including, without limitation, any warranties or conditions
-      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
-      PARTICULAR PURPOSE. You are solely responsible for determining the
-      appropriateness of using or redistributing the Work and assume any
-      risks associated with Your exercise of permissions under this License.
-
-   8. Limitation of Liability. In no event and under no legal theory,
-      whether in tort (including negligence), contract, or otherwise,
-      unless required by applicable law (such as deliberate and grossly
-      negligent acts) or agreed to in writing, shall any Contributor be
-      liable to You for damages, including any direct, indirect, special,
-      incidental, or consequential damages of any character arising as a
-      result of this License or out of the use or inability to use the
-      Work (including but not limited to damages for loss of goodwill,
-      work stoppage, computer failure or malfunction, or any and all
-      other commercial damages or losses), even if such Contributor
-      has been advised of the possibility of such damages.
-
-   9. Accepting Warranty or Additional Liability. While redistributing
-      the Work or Derivative Works thereof, You may choose to offer,
-      and charge a fee for, acceptance of support, warranty, indemnity,
-      or other liability obligations and/or rights consistent with this
-      License. However, in accepting such obligations, You may act only
-      on Your own behalf and on Your sole responsibility, not on behalf
-      of any other Contributor, and only if You agree to indemnify,
-      defend, and hold each Contributor harmless for any liability
-      incurred by, or claims asserted against, such Contributor by reason
-      of your accepting any such warranty or additional liability.
-
-   END OF TERMS AND CONDITIONS
-
-   APPENDIX: How to apply the Apache License to your work.
-
-      To apply the Apache License to your work, attach the following
-      boilerplate notice, with the fields enclosed by brackets "[]"
-      replaced with your own identifying information. (Don't include
-      the brackets!)  The text should be enclosed in the appropriate
-      comment syntax for the file format. We also recommend that a
-      file or class name and description of purpose be included on the
-      same "printed page" as the copyright notice for easier
-      identification within third-party archives.
-
-   Copyright [yyyy] [name of copyright owner]
-
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
diff --git a/vendor/github.com/minio/sha256-simd/README.md b/vendor/github.com/minio/sha256-simd/README.md
deleted file mode 100644
index 6117488d..00000000
--- a/vendor/github.com/minio/sha256-simd/README.md
+++ /dev/null
@@ -1,137 +0,0 @@
-# sha256-simd
-
-Accelerate SHA256 computations in pure Go using AVX512, SHA Extensions for x86 and ARM64 for ARM. 
-On AVX512 it provides an up to 8x improvement (over 3 GB/s per core).
-SHA Extensions give a performance boost of close to 4x over native.
-
-## Introduction
-
-This package is designed as a replacement for `crypto/sha256`. 
-For ARM CPUs with the Cryptography Extensions, advantage is taken of the SHA2 instructions resulting in a massive performance improvement.
-
-This package uses Golang assembly. 
-The AVX512 version is based on the Intel's "multi-buffer crypto library for IPSec" whereas the other Intel implementations are described in "Fast SHA-256 Implementations on Intel Architecture Processors" by J. Guilford et al.
-
-## Support for Intel SHA Extensions
-
-Support for the Intel SHA Extensions has been added by Kristofer Peterson (@svenski123), originally developed for spacemeshos [here](https://github.com/spacemeshos/POET/issues/23). On CPUs that support it (known thus far Intel Celeron J3455 and AMD Ryzen) it gives a significant boost in performance (with thanks to @AudriusButkevicius for reporting the results; full results [here](https://github.com/minio/sha256-simd/pull/37#issuecomment-451607827)).
-
-```
-$ benchcmp avx2.txt sha-ext.txt
-benchmark           AVX2 MB/s    SHA Ext MB/s  speedup
-BenchmarkHash5M     514.40       1975.17       3.84x
-```
-
-Thanks to Kristofer Peterson, we also added additional performance changes such as optimized padding,
-endian conversions which sped up all implementations i.e. Intel SHA alone while doubled performance for small sizes,
-the other changes increased everything roughly 50%.
-
-## Support for AVX512
-
-We have added support for AVX512 which results in an up to 8x performance improvement over AVX2 (3.0 GHz Xeon Platinum 8124M CPU):
-
-```
-$ benchcmp avx2.txt avx512.txt
-benchmark           AVX2 MB/s    AVX512 MB/s  speedup
-BenchmarkHash5M     448.62       3498.20      7.80x
-```
-
-The original code was developed by Intel as part of the [multi-buffer crypto library](https://github.com/intel/intel-ipsec-mb) for IPSec or more specifically this [AVX512](https://github.com/intel/intel-ipsec-mb/blob/master/avx512/sha256_x16_avx512.asm) implementation. The key idea behind it is to process a total of 16 checksums in parallel by “transposing” 16 (independent) messages of 64 bytes between a total of 16 ZMM registers (each 64 bytes wide).
-
-Transposing the input messages means that in order to take full advantage of the speedup you need to have a (server) workload where multiple threads are doing SHA256 calculations in parallel. Unfortunately for this algorithm it is not possible for two message blocks processed in parallel to be dependent on one another — because then the (interim) result of the first part of the message has to be an input into the processing of the second part of the message.
-
-Whereas the original Intel C implementation requires some sort of explicit scheduling of messages to be processed in parallel, for Golang it makes sense to take advantage of channels in order to group messages together and use channels as well for sending back the results (thereby effectively decoupling the calculations). We have implemented a fairly simple scheduling mechanism that seems to work well in practice.
-
-Due to this different way of scheduling, we decided to use an explicit method to instantiate the AVX512 version. Essentially one or more AVX512 processing servers ([`Avx512Server`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L294)) have to be created whereby each server can hash over 3 GB/s on a single core. An `hash.Hash` object ([`Avx512Digest`](https://github.com/minio/sha256-simd/blob/master/sha256blockAvx512_amd64.go#L45)) is then instantiated using one of these servers and used in the regular fashion:
-
-```go
-import "github.com/minio/sha256-simd"
-
-func main() {
-	server := sha256.NewAvx512Server()
-	h512 := sha256.NewAvx512(server)
-	h512.Write(fileBlock)
-	digest := h512.Sum([]byte{})
-}
-```
-
-Note that, because of the scheduling overhead, for small messages (< 1 MB) you will be better off using the regular SHA256 hashing (but those are typically not performance critical anyway). Some other tips to get the best performance:
-* Have many go routines doing SHA256 calculations in parallel.
-* Try to Write() messages in multiples of 64 bytes.
-* Try to keep the overall length of messages to a roughly similar size ie. 5 MB (this way all 16 ‘lanes’ in the AVX512 computations are contributing as much as possible).
-
-More detailed information can be found in this [blog](https://blog.minio.io/accelerate-sha256-up-to-8x-over-3-gb-s-per-core-with-avx512-a0b1d64f78f) post including scaling across cores.
-
-## Drop-In Replacement
-
-The following code snippet shows how you can use `github.com/minio/sha256-simd`. 
-This will automatically select the fastest method for the architecture on which it will be executed.
-
-```go
-import "github.com/minio/sha256-simd"
-
-func main() {
-        ...
-	shaWriter := sha256.New()
-	io.Copy(shaWriter, file)
-        ...
-}
-```
-
-## Performance
-
-Below is the speed in MB/s for a single core (ranked fast to slow) for blocks larger than 1 MB.
-
-| Processor                         | SIMD    | Speed (MB/s) |
-| --------------------------------- | ------- | ------------:|
-| 3.0 GHz Intel Xeon Platinum 8124M | AVX512  |         3498 |
-| 3.7 GHz AMD Ryzen 7 2700X         | SHA Ext |         1979 |
-| 1.2 GHz ARM Cortex-A53            | ARM64   |          638 |
-
-## asm2plan9s
-
-In order to be able to work more easily with AVX512/AVX2 instructions, a separate tool was developed to convert SIMD instructions into the corresponding BYTE sequence as accepted by Go assembly. See [asm2plan9s](https://github.com/minio/asm2plan9s) for more information.
-
-## Why and benefits
-
-One of the most performance sensitive parts of the [Minio](https://github.com/minio/minio) object storage server is related to SHA256 hash sums calculations. For instance during multi part uploads each part that is uploaded needs to be verified for data integrity by the server.
-
-Other applications that can benefit from enhanced SHA256 performance are deduplication in storage systems, intrusion detection, version control systems, integrity checking, etc.
-
-## ARM SHA Extensions
-
-The 64-bit ARMv8 core has introduced new instructions for SHA1 and SHA2 acceleration as part of the [Cryptography Extensions](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0501f/CHDFJBCJ.html). Below you can see a small excerpt highlighting one of the rounds as is done for the SHA256 calculation process (for full code see [sha256block_arm64.s](https://github.com/minio/sha256-simd/blob/master/sha256block_arm64.s)).
-
- ```
- sha256h    q2, q3, v9.4s
- sha256h2   q3, q4, v9.4s
- sha256su0  v5.4s, v6.4s
- rev32      v8.16b, v8.16b
- add        v9.4s, v7.4s, v18.4s
- mov        v4.16b, v2.16b
- sha256h    q2, q3, v10.4s
- sha256h2   q3, q4, v10.4s
- sha256su0  v6.4s, v7.4s
- sha256su1  v5.4s, v7.4s, v8.4s
- ```
-
-### Detailed benchmarks
-
-Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/).
-
-```
-minio@minio-arm:$ benchcmp golang.txt arm64.txt
-benchmark                 golang         arm64        speedup
-BenchmarkHash8Bytes-4     0.68 MB/s      5.70 MB/s      8.38x
-BenchmarkHash1K-4         5.65 MB/s    326.30 MB/s     57.75x
-BenchmarkHash8K-4         6.00 MB/s    570.63 MB/s     95.11x
-BenchmarkHash1M-4         6.05 MB/s    638.23 MB/s    105.49x
-```
-
-## License
-
-Released under the Apache License v2.0. You can find the complete text in the file LICENSE.
-
-## Contributing
-
-Contributions are welcome, please send PRs for any enhancements.
diff --git a/vendor/github.com/minio/sha256-simd/cpuid_other.go b/vendor/github.com/minio/sha256-simd/cpuid_other.go
deleted file mode 100644
index 97af6a19..00000000
--- a/vendor/github.com/minio/sha256-simd/cpuid_other.go
+++ /dev/null
@@ -1,50 +0,0 @@
-// Minio Cloud Storage, (C) 2021 Minio, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-package sha256
-
-import (
-	"bytes"
-	"io/ioutil"
-	"runtime"
-
-	"github.com/klauspost/cpuid/v2"
-)
-
-var (
-	hasIntelSha = runtime.GOARCH == "amd64" && cpuid.CPU.Supports(cpuid.SHA, cpuid.SSSE3, cpuid.SSE4)
-	hasAvx512   = cpuid.CPU.Supports(cpuid.AVX512F, cpuid.AVX512DQ, cpuid.AVX512BW, cpuid.AVX512VL)
-)
-
-func hasArmSha2() bool {
-	if cpuid.CPU.Has(cpuid.SHA2) {
-		return true
-	}
-	if runtime.GOARCH != "arm64" || runtime.GOOS != "linux" {
-		return false
-	}
-
-	// Fall back to hacky cpuinfo parsing...
-	const procCPUInfo = "/proc/cpuinfo"
-
-	// Feature to check for.
-	const sha256Feature = "sha2"
-
-	cpuInfo, err := ioutil.ReadFile(procCPUInfo)
-	if err != nil {
-		return false
-	}
-	return bytes.Contains(cpuInfo, []byte(sha256Feature))
-}
diff --git a/vendor/github.com/minio/sha256-simd/sha256.go b/vendor/github.com/minio/sha256-simd/sha256.go
deleted file mode 100644
index f146bbdb..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256.go
+++ /dev/null
@@ -1,468 +0,0 @@
-/*
- * Minio Cloud Storage, (C) 2016 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package sha256
-
-import (
-	"crypto/sha256"
-	"encoding/binary"
-	"errors"
-	"hash"
-)
-
-// Size - The size of a SHA256 checksum in bytes.
-const Size = 32
-
-// BlockSize - The blocksize of SHA256 in bytes.
-const BlockSize = 64
-
-const (
-	chunk = BlockSize
-	init0 = 0x6A09E667
-	init1 = 0xBB67AE85
-	init2 = 0x3C6EF372
-	init3 = 0xA54FF53A
-	init4 = 0x510E527F
-	init5 = 0x9B05688C
-	init6 = 0x1F83D9AB
-	init7 = 0x5BE0CD19
-)
-
-// digest represents the partial evaluation of a checksum.
-type digest struct {
-	h   [8]uint32
-	x   [chunk]byte
-	nx  int
-	len uint64
-}
-
-// Reset digest back to default
-func (d *digest) Reset() {
-	d.h[0] = init0
-	d.h[1] = init1
-	d.h[2] = init2
-	d.h[3] = init3
-	d.h[4] = init4
-	d.h[5] = init5
-	d.h[6] = init6
-	d.h[7] = init7
-	d.nx = 0
-	d.len = 0
-}
-
-type blockfuncType int
-
-const (
-	blockfuncStdlib blockfuncType = iota
-	blockfuncIntelSha
-	blockfuncArmSha2
-	blockfuncForceGeneric = -1
-)
-
-var blockfunc blockfuncType
-
-func init() {
-	switch {
-	case hasIntelSha:
-		blockfunc = blockfuncIntelSha
-	case hasArmSha2():
-		blockfunc = blockfuncArmSha2
-	}
-}
-
-// New returns a new hash.Hash computing the SHA256 checksum.
-func New() hash.Hash {
-	if blockfunc == blockfuncStdlib {
-		// Fallback to the standard golang implementation
-		// if no features were found.
-		return sha256.New()
-	}
-
-	d := new(digest)
-	d.Reset()
-	return d
-}
-
-// Sum256 - single caller sha256 helper
-func Sum256(data []byte) (result [Size]byte) {
-	var d digest
-	d.Reset()
-	d.Write(data)
-	result = d.checkSum()
-	return
-}
-
-// Return size of checksum
-func (d *digest) Size() int { return Size }
-
-// Return blocksize of checksum
-func (d *digest) BlockSize() int { return BlockSize }
-
-// Write to digest
-func (d *digest) Write(p []byte) (nn int, err error) {
-	nn = len(p)
-	d.len += uint64(nn)
-	if d.nx > 0 {
-		n := copy(d.x[d.nx:], p)
-		d.nx += n
-		if d.nx == chunk {
-			block(d, d.x[:])
-			d.nx = 0
-		}
-		p = p[n:]
-	}
-	if len(p) >= chunk {
-		n := len(p) &^ (chunk - 1)
-		block(d, p[:n])
-		p = p[n:]
-	}
-	if len(p) > 0 {
-		d.nx = copy(d.x[:], p)
-	}
-	return
-}
-
-// Return sha256 sum in bytes
-func (d *digest) Sum(in []byte) []byte {
-	// Make a copy of d0 so that caller can keep writing and summing.
-	d0 := *d
-	hash := d0.checkSum()
-	return append(in, hash[:]...)
-}
-
-// Intermediate checksum function
-func (d *digest) checkSum() (digest [Size]byte) {
-	n := d.nx
-
-	var k [64]byte
-	copy(k[:], d.x[:n])
-
-	k[n] = 0x80
-
-	if n >= 56 {
-		block(d, k[:])
-
-		// clear block buffer - go compiles this to optimal 1x xorps + 4x movups
-		// unfortunately expressing this more succinctly results in much worse code
-		k[0] = 0
-		k[1] = 0
-		k[2] = 0
-		k[3] = 0
-		k[4] = 0
-		k[5] = 0
-		k[6] = 0
-		k[7] = 0
-		k[8] = 0
-		k[9] = 0
-		k[10] = 0
-		k[11] = 0
-		k[12] = 0
-		k[13] = 0
-		k[14] = 0
-		k[15] = 0
-		k[16] = 0
-		k[17] = 0
-		k[18] = 0
-		k[19] = 0
-		k[20] = 0
-		k[21] = 0
-		k[22] = 0
-		k[23] = 0
-		k[24] = 0
-		k[25] = 0
-		k[26] = 0
-		k[27] = 0
-		k[28] = 0
-		k[29] = 0
-		k[30] = 0
-		k[31] = 0
-		k[32] = 0
-		k[33] = 0
-		k[34] = 0
-		k[35] = 0
-		k[36] = 0
-		k[37] = 0
-		k[38] = 0
-		k[39] = 0
-		k[40] = 0
-		k[41] = 0
-		k[42] = 0
-		k[43] = 0
-		k[44] = 0
-		k[45] = 0
-		k[46] = 0
-		k[47] = 0
-		k[48] = 0
-		k[49] = 0
-		k[50] = 0
-		k[51] = 0
-		k[52] = 0
-		k[53] = 0
-		k[54] = 0
-		k[55] = 0
-		k[56] = 0
-		k[57] = 0
-		k[58] = 0
-		k[59] = 0
-		k[60] = 0
-		k[61] = 0
-		k[62] = 0
-		k[63] = 0
-	}
-	binary.BigEndian.PutUint64(k[56:64], uint64(d.len)<<3)
-	block(d, k[:])
-
-	{
-		const i = 0
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-	{
-		const i = 1
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-	{
-		const i = 2
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-	{
-		const i = 3
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-	{
-		const i = 4
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-	{
-		const i = 5
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-	{
-		const i = 6
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-	{
-		const i = 7
-		binary.BigEndian.PutUint32(digest[i*4:i*4+4], d.h[i])
-	}
-
-	return
-}
-
-func block(dig *digest, p []byte) {
-	if blockfunc == blockfuncIntelSha {
-		blockIntelShaGo(dig, p)
-	} else if blockfunc == blockfuncArmSha2 {
-		blockArmSha2Go(dig, p)
-	} else {
-		blockGeneric(dig, p)
-	}
-}
-
-func blockGeneric(dig *digest, p []byte) {
-	var w [64]uint32
-	h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]
-	for len(p) >= chunk {
-		// Can interlace the computation of w with the
-		// rounds below if needed for speed.
-		for i := 0; i < 16; i++ {
-			j := i * 4
-			w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3])
-		}
-		for i := 16; i < 64; i++ {
-			v1 := w[i-2]
-			t1 := (v1>>17 | v1<<(32-17)) ^ (v1>>19 | v1<<(32-19)) ^ (v1 >> 10)
-			v2 := w[i-15]
-			t2 := (v2>>7 | v2<<(32-7)) ^ (v2>>18 | v2<<(32-18)) ^ (v2 >> 3)
-			w[i] = t1 + w[i-7] + t2 + w[i-16]
-		}
-
-		a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7
-
-		for i := 0; i < 64; i++ {
-			t1 := h + ((e>>6 | e<<(32-6)) ^ (e>>11 | e<<(32-11)) ^ (e>>25 | e<<(32-25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i]
-
-			t2 := ((a>>2 | a<<(32-2)) ^ (a>>13 | a<<(32-13)) ^ (a>>22 | a<<(32-22))) + ((a & b) ^ (a & c) ^ (b & c))
-
-			h = g
-			g = f
-			f = e
-			e = d + t1
-			d = c
-			c = b
-			b = a
-			a = t1 + t2
-		}
-
-		h0 += a
-		h1 += b
-		h2 += c
-		h3 += d
-		h4 += e
-		h5 += f
-		h6 += g
-		h7 += h
-
-		p = p[chunk:]
-	}
-
-	dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7
-}
-
-var _K = []uint32{
-	0x428a2f98,
-	0x71374491,
-	0xb5c0fbcf,
-	0xe9b5dba5,
-	0x3956c25b,
-	0x59f111f1,
-	0x923f82a4,
-	0xab1c5ed5,
-	0xd807aa98,
-	0x12835b01,
-	0x243185be,
-	0x550c7dc3,
-	0x72be5d74,
-	0x80deb1fe,
-	0x9bdc06a7,
-	0xc19bf174,
-	0xe49b69c1,
-	0xefbe4786,
-	0x0fc19dc6,
-	0x240ca1cc,
-	0x2de92c6f,
-	0x4a7484aa,
-	0x5cb0a9dc,
-	0x76f988da,
-	0x983e5152,
-	0xa831c66d,
-	0xb00327c8,
-	0xbf597fc7,
-	0xc6e00bf3,
-	0xd5a79147,
-	0x06ca6351,
-	0x14292967,
-	0x27b70a85,
-	0x2e1b2138,
-	0x4d2c6dfc,
-	0x53380d13,
-	0x650a7354,
-	0x766a0abb,
-	0x81c2c92e,
-	0x92722c85,
-	0xa2bfe8a1,
-	0xa81a664b,
-	0xc24b8b70,
-	0xc76c51a3,
-	0xd192e819,
-	0xd6990624,
-	0xf40e3585,
-	0x106aa070,
-	0x19a4c116,
-	0x1e376c08,
-	0x2748774c,
-	0x34b0bcb5,
-	0x391c0cb3,
-	0x4ed8aa4a,
-	0x5b9cca4f,
-	0x682e6ff3,
-	0x748f82ee,
-	0x78a5636f,
-	0x84c87814,
-	0x8cc70208,
-	0x90befffa,
-	0xa4506ceb,
-	0xbef9a3f7,
-	0xc67178f2,
-}
-
-const (
-	magic256      = "sha\x03"
-	marshaledSize = len(magic256) + 8*4 + chunk + 8
-)
-
-func (d *digest) MarshalBinary() ([]byte, error) {
-	b := make([]byte, 0, marshaledSize)
-	b = append(b, magic256...)
-	b = appendUint32(b, d.h[0])
-	b = appendUint32(b, d.h[1])
-	b = appendUint32(b, d.h[2])
-	b = appendUint32(b, d.h[3])
-	b = appendUint32(b, d.h[4])
-	b = appendUint32(b, d.h[5])
-	b = appendUint32(b, d.h[6])
-	b = appendUint32(b, d.h[7])
-	b = append(b, d.x[:d.nx]...)
-	b = b[:len(b)+len(d.x)-d.nx] // already zero
-	b = appendUint64(b, d.len)
-	return b, nil
-}
-
-func (d *digest) UnmarshalBinary(b []byte) error {
-	if len(b) < len(magic256) || string(b[:len(magic256)]) != magic256 {
-		return errors.New("crypto/sha256: invalid hash state identifier")
-	}
-	if len(b) != marshaledSize {
-		return errors.New("crypto/sha256: invalid hash state size")
-	}
-	b = b[len(magic256):]
-	b, d.h[0] = consumeUint32(b)
-	b, d.h[1] = consumeUint32(b)
-	b, d.h[2] = consumeUint32(b)
-	b, d.h[3] = consumeUint32(b)
-	b, d.h[4] = consumeUint32(b)
-	b, d.h[5] = consumeUint32(b)
-	b, d.h[6] = consumeUint32(b)
-	b, d.h[7] = consumeUint32(b)
-	b = b[copy(d.x[:], b):]
-	b, d.len = consumeUint64(b)
-	d.nx = int(d.len % chunk)
-	return nil
-}
-
-func appendUint32(b []byte, v uint32) []byte {
-	return append(b,
-		byte(v>>24),
-		byte(v>>16),
-		byte(v>>8),
-		byte(v),
-	)
-}
-
-func appendUint64(b []byte, v uint64) []byte {
-	return append(b,
-		byte(v>>56),
-		byte(v>>48),
-		byte(v>>40),
-		byte(v>>32),
-		byte(v>>24),
-		byte(v>>16),
-		byte(v>>8),
-		byte(v),
-	)
-}
-
-func consumeUint64(b []byte) ([]byte, uint64) {
-	_ = b[7]
-	x := uint64(b[7]) | uint64(b[6])<<8 | uint64(b[5])<<16 | uint64(b[4])<<24 |
-		uint64(b[3])<<32 | uint64(b[2])<<40 | uint64(b[1])<<48 | uint64(b[0])<<56
-	return b[8:], x
-}
-
-func consumeUint32(b []byte) ([]byte, uint32) {
-	_ = b[3]
-	x := uint32(b[3]) | uint32(b[2])<<8 | uint32(b[1])<<16 | uint32(b[0])<<24
-	return b[4:], x
-}
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm
deleted file mode 100644
index c959b1aa..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.asm
+++ /dev/null
@@ -1,686 +0,0 @@
-
-// 16x Parallel implementation of SHA256 for AVX512
-
-//
-// Minio Cloud Storage, (C) 2017 Minio, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-//
-// This code is based on the Intel Multi-Buffer Crypto for IPSec library
-// and more specifically the following implementation:
-// https://github.com/intel/intel-ipsec-mb/blob/master/avx512/sha256_x16_avx512.asm
-//
-// For Golang it has been converted into Plan 9 assembly with the help of
-// github.com/minio/asm2plan9s to assemble the AVX512 instructions
-//
-
-// Copyright (c) 2017, Intel Corporation
-//
-// Redistribution and use in source and binary forms, with or without
-// modification, are permitted provided that the following conditions are met:
-//
-//     * Redistributions of source code must retain the above copyright notice,
-//       this list of conditions and the following disclaimer.
-//     * Redistributions in binary form must reproduce the above copyright
-//       notice, this list of conditions and the following disclaimer in the
-//       documentation and/or other materials provided with the distribution.
-//     * Neither the name of Intel Corporation nor the names of its contributors
-//       may be used to endorse or promote products derived from this software
-//       without specific prior written permission.
-//
-// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
-// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
-// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
-// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
-// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
-// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
-// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-#define SHA256_DIGEST_ROW_SIZE 64
-
-// arg1
-#define STATE rdi
-#define STATE_P9 DI
-// arg2
-#define INP_SIZE rsi
-#define INP_SIZE_P9 SI
-
-#define IDX rcx
-#define TBL rdx
-#define TBL_P9 DX
-
-#define INPUT rax
-#define INPUT_P9 AX
-
-#define inp0	r9
-#define SCRATCH_P9 R12
-#define SCRATCH  r12
-#define maskp    r13
-#define MASKP_P9 R13
-#define mask     r14
-#define MASK_P9  R14
-
-#define A       zmm0
-#define B       zmm1
-#define C       zmm2
-#define D       zmm3
-#define E       zmm4
-#define F       zmm5
-#define G       zmm6
-#define H       zmm7
-#define T1      zmm8
-#define TMP0    zmm9
-#define TMP1    zmm10
-#define TMP2    zmm11
-#define TMP3    zmm12
-#define TMP4    zmm13
-#define TMP5    zmm14
-#define TMP6    zmm15
-
-#define W0      zmm16
-#define W1      zmm17
-#define W2      zmm18
-#define W3      zmm19
-#define W4      zmm20
-#define W5      zmm21
-#define W6      zmm22
-#define W7      zmm23
-#define W8      zmm24
-#define W9      zmm25
-#define W10     zmm26
-#define W11     zmm27
-#define W12     zmm28
-#define W13     zmm29
-#define W14     zmm30
-#define W15     zmm31
-
-
-#define TRANSPOSE16(_r0, _r1, _r2, _r3, _r4, _r5, _r6, _r7, _r8, _r9, _r10, _r11, _r12, _r13, _r14, _r15, _t0, _t1) \
-    \
-    \ // input   r0  = {a15 a14 a13 a12   a11 a10  a9  a8    a7  a6  a5  a4    a3  a2  a1  a0}
-    \ //         r1  = {b15 b14 b13 b12   b11 b10  b9  b8    b7  b6  b5  b4    b3  b2  b1  b0}
-    \ //         r2  = {c15 c14 c13 c12   c11 c10  c9  c8    c7  c6  c5  c4    c3  c2  c1  c0}
-    \ //         r3  = {d15 d14 d13 d12   d11 d10  d9  d8    d7  d6  d5  d4    d3  d2  d1  d0}
-    \ //         r4  = {e15 e14 e13 e12   e11 e10  e9  e8    e7  e6  e5  e4    e3  e2  e1  e0}
-    \ //         r5  = {f15 f14 f13 f12   f11 f10  f9  f8    f7  f6  f5  f4    f3  f2  f1  f0}
-    \ //         r6  = {g15 g14 g13 g12   g11 g10  g9  g8    g7  g6  g5  g4    g3  g2  g1  g0}
-    \ //         r7  = {h15 h14 h13 h12   h11 h10  h9  h8    h7  h6  h5  h4    h3  h2  h1  h0}
-    \ //         r8  = {i15 i14 i13 i12   i11 i10  i9  i8    i7  i6  i5  i4    i3  i2  i1  i0}
-    \ //         r9  = {j15 j14 j13 j12   j11 j10  j9  j8    j7  j6  j5  j4    j3  j2  j1  j0}
-    \ //         r10 = {k15 k14 k13 k12   k11 k10  k9  k8    k7  k6  k5  k4    k3  k2  k1  k0}
-    \ //         r11 = {l15 l14 l13 l12   l11 l10  l9  l8    l7  l6  l5  l4    l3  l2  l1  l0}
-    \ //         r12 = {m15 m14 m13 m12   m11 m10  m9  m8    m7  m6  m5  m4    m3  m2  m1  m0}
-    \ //         r13 = {n15 n14 n13 n12   n11 n10  n9  n8    n7  n6  n5  n4    n3  n2  n1  n0}
-    \ //         r14 = {o15 o14 o13 o12   o11 o10  o9  o8    o7  o6  o5  o4    o3  o2  o1  o0}
-    \ //         r15 = {p15 p14 p13 p12   p11 p10  p9  p8    p7  p6  p5  p4    p3  p2  p1  p0}
-    \
-    \ // output  r0  = { p0  o0  n0  m0    l0  k0  j0  i0    h0  g0  f0  e0    d0  c0  b0  a0}
-    \ //         r1  = { p1  o1  n1  m1    l1  k1  j1  i1    h1  g1  f1  e1    d1  c1  b1  a1}
-    \ //         r2  = { p2  o2  n2  m2    l2  k2  j2  i2    h2  g2  f2  e2    d2  c2  b2  a2}
-    \ //         r3  = { p3  o3  n3  m3    l3  k3  j3  i3    h3  g3  f3  e3    d3  c3  b3  a3}
-    \ //         r4  = { p4  o4  n4  m4    l4  k4  j4  i4    h4  g4  f4  e4    d4  c4  b4  a4}
-    \ //         r5  = { p5  o5  n5  m5    l5  k5  j5  i5    h5  g5  f5  e5    d5  c5  b5  a5}
-    \ //         r6  = { p6  o6  n6  m6    l6  k6  j6  i6    h6  g6  f6  e6    d6  c6  b6  a6}
-    \ //         r7  = { p7  o7  n7  m7    l7  k7  j7  i7    h7  g7  f7  e7    d7  c7  b7  a7}
-    \ //         r8  = { p8  o8  n8  m8    l8  k8  j8  i8    h8  g8  f8  e8    d8  c8  b8  a8}
-    \ //         r9  = { p9  o9  n9  m9    l9  k9  j9  i9    h9  g9  f9  e9    d9  c9  b9  a9}
-    \ //         r10 = {p10 o10 n10 m10   l10 k10 j10 i10   h10 g10 f10 e10   d10 c10 b10 a10}
-    \ //         r11 = {p11 o11 n11 m11   l11 k11 j11 i11   h11 g11 f11 e11   d11 c11 b11 a11}
-    \ //         r12 = {p12 o12 n12 m12   l12 k12 j12 i12   h12 g12 f12 e12   d12 c12 b12 a12}
-    \ //         r13 = {p13 o13 n13 m13   l13 k13 j13 i13   h13 g13 f13 e13   d13 c13 b13 a13}
-    \ //         r14 = {p14 o14 n14 m14   l14 k14 j14 i14   h14 g14 f14 e14   d14 c14 b14 a14}
-    \ //         r15 = {p15 o15 n15 m15   l15 k15 j15 i15   h15 g15 f15 e15   d15 c15 b15 a15}
-    \
-    \ // process top half
-    vshufps _t0, _r0, _r1, 0x44      \ // t0 = {b13 b12 a13 a12   b9  b8  a9  a8   b5 b4 a5 a4   b1 b0 a1 a0}
-    vshufps _r0, _r0, _r1, 0xEE      \ // r0 = {b15 b14 a15 a14   b11 b10 a11 a10  b7 b6 a7 a6   b3 b2 a3 a2}
-    vshufps _t1, _r2, _r3, 0x44      \ // t1 = {d13 d12 c13 c12   d9  d8  c9  c8   d5 d4 c5 c4   d1 d0 c1 c0}
-    vshufps _r2, _r2, _r3, 0xEE      \ // r2 = {d15 d14 c15 c14   d11 d10 c11 c10  d7 d6 c7 c6   d3 d2 c3 c2}
-                                     \
-    vshufps	_r3, _t0, _t1, 0xDD      \ // r3 = {d13 c13 b13 a13   d9  c9  b9  a9   d5 c5 b5 a5   d1 c1 b1 a1}
-    vshufps	_r1, _r0, _r2, 0x88      \ // r1 = {d14 c14 b14 a14   d10 c10 b10 a10  d6 c6 b6 a6   d2 c2 b2 a2}
-    vshufps	_r0, _r0, _r2, 0xDD      \ // r0 = {d15 c15 b15 a15   d11 c11 b11 a11  d7 c7 b7 a7   d3 c3 b3 a3}
-    vshufps	_t0, _t0, _t1, 0x88      \ // t0 = {d12 c12 b12 a12   d8  c8  b8  a8   d4 c4 b4 a4   d0 c0 b0 a0}
-                                     \
-    \ // use r2 in place of t0
-    vshufps _r2, _r4, _r5, 0x44      \ // r2 = {f13 f12 e13 e12   f9  f8  e9  e8   f5 f4 e5 e4   f1 f0 e1 e0}
-    vshufps _r4, _r4, _r5, 0xEE      \ // r4 = {f15 f14 e15 e14   f11 f10 e11 e10  f7 f6 e7 e6   f3 f2 e3 e2}
-    vshufps _t1, _r6, _r7, 0x44      \ // t1 = {h13 h12 g13 g12   h9  h8  g9  g8   h5 h4 g5 g4   h1 h0 g1 g0}
-    vshufps _r6, _r6, _r7, 0xEE      \ // r6 = {h15 h14 g15 g14   h11 h10 g11 g10  h7 h6 g7 g6   h3 h2 g3 g2}
-                                     \
-    vshufps _r7, _r2, _t1, 0xDD      \ // r7 = {h13 g13 f13 e13   h9  g9  f9  e9   h5 g5 f5 e5   h1 g1 f1 e1}
-    vshufps _r5, _r4, _r6, 0x88      \ // r5 = {h14 g14 f14 e14   h10 g10 f10 e10  h6 g6 f6 e6   h2 g2 f2 e2}
-    vshufps _r4, _r4, _r6, 0xDD      \ // r4 = {h15 g15 f15 e15   h11 g11 f11 e11  h7 g7 f7 e7   h3 g3 f3 e3}
-    vshufps _r2, _r2, _t1, 0x88      \ // r2 = {h12 g12 f12 e12   h8  g8  f8  e8   h4 g4 f4 e4   h0 g0 f0 e0}
-                                     \
-    \ // use r6 in place of t0
-    vshufps _r6, _r8, _r9,    0x44   \ // r6  = {j13 j12 i13 i12   j9  j8  i9  i8   j5 j4 i5 i4   j1 j0 i1 i0}
-    vshufps _r8, _r8, _r9,    0xEE   \ // r8  = {j15 j14 i15 i14   j11 j10 i11 i10  j7 j6 i7 i6   j3 j2 i3 i2}
-    vshufps _t1, _r10, _r11,  0x44   \ // t1  = {l13 l12 k13 k12   l9  l8  k9  k8   l5 l4 k5 k4   l1 l0 k1 k0}
-    vshufps _r10, _r10, _r11, 0xEE   \ // r10 = {l15 l14 k15 k14   l11 l10 k11 k10  l7 l6 k7 k6   l3 l2 k3 k2}
-                                     \
-    vshufps _r11, _r6, _t1, 0xDD     \ // r11 = {l13 k13 j13 113   l9  k9  j9  i9   l5 k5 j5 i5   l1 k1 j1 i1}
-    vshufps _r9, _r8, _r10, 0x88     \ // r9  = {l14 k14 j14 114   l10 k10 j10 i10  l6 k6 j6 i6   l2 k2 j2 i2}
-    vshufps _r8, _r8, _r10, 0xDD     \ // r8  = {l15 k15 j15 115   l11 k11 j11 i11  l7 k7 j7 i7   l3 k3 j3 i3}
-    vshufps _r6, _r6, _t1,  0x88     \ // r6  = {l12 k12 j12 112   l8  k8  j8  i8   l4 k4 j4 i4   l0 k0 j0 i0}
-                                     \
-    \ // use r10 in place of t0
-    vshufps _r10, _r12, _r13, 0x44   \ // r10 = {n13 n12 m13 m12   n9  n8  m9  m8   n5 n4 m5 m4   n1 n0 a1 m0}
-    vshufps _r12, _r12, _r13, 0xEE   \ // r12 = {n15 n14 m15 m14   n11 n10 m11 m10  n7 n6 m7 m6   n3 n2 a3 m2}
-    vshufps _t1, _r14, _r15,  0x44   \ // t1  = {p13 p12 013 012   p9  p8  09  08   p5 p4 05 04   p1 p0 01 00}
-    vshufps _r14, _r14, _r15, 0xEE   \ // r14 = {p15 p14 015 014   p11 p10 011 010  p7 p6 07 06   p3 p2 03 02}
-                                     \
-    vshufps _r15, _r10, _t1,  0xDD   \ // r15 = {p13 013 n13 m13   p9  09  n9  m9   p5 05 n5 m5   p1 01 n1 m1}
-    vshufps _r13, _r12, _r14, 0x88   \ // r13 = {p14 014 n14 m14   p10 010 n10 m10  p6 06 n6 m6   p2 02 n2 m2}
-    vshufps _r12, _r12, _r14, 0xDD   \ // r12 = {p15 015 n15 m15   p11 011 n11 m11  p7 07 n7 m7   p3 03 n3 m3}
-    vshufps _r10, _r10, _t1,  0x88   \ // r10 = {p12 012 n12 m12   p8  08  n8  m8   p4 04 n4 m4   p0 00 n0 m0}
-                                     \
-    \ // At this point, the registers that contain interesting data are:
-    \ // t0, r3, r1, r0, r2, r7, r5, r4, r6, r11, r9, r8, r10, r15, r13, r12
-    \ // Can use t1 and r14 as scratch registers
-    LEAQ PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX \
-    LEAQ PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8 \
-                                     \
-    vmovdqu32 _r14, [rbx]            \
-    vpermi2q  _r14, _t0, _r2         \ // r14 = {h8  g8  f8  e8   d8  c8  b8  a8   h0 g0 f0 e0	 d0 c0 b0 a0}
-    vmovdqu32 _t1,  [r8]             \
-    vpermi2q  _t1,  _t0, _r2         \ // t1  = {h12 g12 f12 e12  d12 c12 b12 a12  h4 g4 f4 e4	 d4 c4 b4 a4}
-                                     \
-    vmovdqu32 _r2, [rbx]             \
-    vpermi2q  _r2, _r3, _r7          \ // r2  = {h9  g9  f9  e9   d9  c9  b9  a9   h1 g1 f1 e1	 d1 c1 b1 a1}
-    vmovdqu32 _t0, [r8]              \
-    vpermi2q  _t0, _r3, _r7          \ // t0  = {h13 g13 f13 e13  d13 c13 b13 a13  h5 g5 f5 e5	 d5 c5 b5 a5}
-                                     \
-    vmovdqu32 _r3, [rbx]             \
-    vpermi2q  _r3, _r1, _r5          \ // r3  = {h10 g10 f10 e10  d10 c10 b10 a10  h2 g2 f2 e2	 d2 c2 b2 a2}
-    vmovdqu32 _r7, [r8]              \
-    vpermi2q  _r7, _r1, _r5          \ // r7  = {h14 g14 f14 e14  d14 c14 b14 a14  h6 g6 f6 e6	 d6 c6 b6 a6}
-                                     \
-    vmovdqu32 _r1, [rbx]             \
-    vpermi2q  _r1, _r0, _r4          \ // r1  = {h11 g11 f11 e11  d11 c11 b11 a11  h3 g3 f3 e3	 d3 c3 b3 a3}
-    vmovdqu32 _r5, [r8]              \
-    vpermi2q  _r5, _r0, _r4          \ // r5  = {h15 g15 f15 e15  d15 c15 b15 a15  h7 g7 f7 e7	 d7 c7 b7 a7}
-                                     \
-    vmovdqu32 _r0, [rbx]             \
-    vpermi2q  _r0, _r6, _r10         \ // r0  = {p8  o8  n8  m8   l8  k8  j8  i8   p0 o0 n0 m0	 l0 k0 j0 i0}
-    vmovdqu32 _r4, [r8]              \
-    vpermi2q  _r4, _r6, _r10         \ // r4  = {p12 o12 n12 m12  l12 k12 j12 i12  p4 o4 n4 m4	 l4 k4 j4 i4}
-                                     \
-    vmovdqu32 _r6, [rbx]             \
-    vpermi2q  _r6, _r11, _r15        \ // r6  = {p9  o9  n9  m9   l9  k9  j9  i9   p1 o1 n1 m1	 l1 k1 j1 i1}
-    vmovdqu32 _r10, [r8]             \
-    vpermi2q  _r10, _r11, _r15       \ // r10 = {p13 o13 n13 m13  l13 k13 j13 i13  p5 o5 n5 m5	 l5 k5 j5 i5}
-                                     \
-    vmovdqu32 _r11, [rbx]            \
-    vpermi2q  _r11, _r9, _r13        \ // r11 = {p10 o10 n10 m10  l10 k10 j10 i10  p2 o2 n2 m2	 l2 k2 j2 i2}
-    vmovdqu32 _r15, [r8]             \
-    vpermi2q  _r15, _r9, _r13        \ // r15 = {p14 o14 n14 m14  l14 k14 j14 i14  p6 o6 n6 m6	 l6 k6 j6 i6}
-                                     \
-    vmovdqu32 _r9, [rbx]             \
-    vpermi2q  _r9, _r8, _r12         \ // r9  = {p11 o11 n11 m11  l11 k11 j11 i11  p3 o3 n3 m3	 l3 k3 j3 i3}
-    vmovdqu32 _r13, [r8]             \
-    vpermi2q  _r13, _r8, _r12        \ // r13 = {p15 o15 n15 m15  l15 k15 j15 i15  p7 o7 n7 m7	 l7 k7 j7 i7}
-                                     \
-    \ // At this point r8 and r12 can be used as scratch registers
-    vshuff64x2 _r8, _r14, _r0, 0xEE  \ // r8  = {p8  o8  n8  m8   l8  k8  j8  i8   h8 g8 f8 e8   d8 c8 b8 a8}
-    vshuff64x2 _r0, _r14, _r0, 0x44  \ // r0  = {p0  o0  n0  m0   l0  k0  j0  i0   h0 g0 f0 e0   d0 c0 b0 a0}
-                                     \
-    vshuff64x2 _r12, _t1, _r4, 0xEE  \ // r12 = {p12 o12 n12 m12  l12 k12 j12 i12  h12 g12 f12 e12  d12 c12 b12 a12}
-    vshuff64x2 _r4, _t1, _r4, 0x44   \ // r4  = {p4  o4  n4  m4   l4  k4  j4  i4   h4 g4 f4 e4   d4 c4 b4 a4}
-                                     \
-    vshuff64x2 _r14, _r7, _r15, 0xEE \ // r14 = {p14 o14 n14 m14  l14 k14 j14 i14  h14 g14 f14 e14  d14 c14 b14 a14}
-    vshuff64x2 _t1, _r7, _r15, 0x44  \ // t1  = {p6  o6  n6  m6   l6  k6  j6  i6   h6 g6 f6 e6   d6 c6 b6 a6}
-                                     \
-    vshuff64x2 _r15, _r5, _r13, 0xEE \ // r15 = {p15 o15 n15 m15  l15 k15 j15 i15  h15 g15 f15 e15  d15 c15 b15 a15}
-    vshuff64x2 _r7, _r5, _r13, 0x44  \ // r7  = {p7  o7  n7  m7   l7  k7  j7  i7   h7 g7 f7 e7   d7 c7 b7 a7}
-                                     \
-    vshuff64x2 _r13, _t0, _r10, 0xEE \ // r13 = {p13 o13 n13 m13  l13 k13 j13 i13  h13 g13 f13 e13  d13 c13 b13 a13}
-    vshuff64x2 _r5, _t0, _r10, 0x44  \ // r5  = {p5  o5  n5  m5   l5  k5  j5  i5   h5 g5 f5 e5   d5 c5 b5 a5}
-                                     \
-    vshuff64x2 _r10, _r3, _r11, 0xEE \ // r10 = {p10 o10 n10 m10  l10 k10 j10 i10  h10 g10 f10 e10  d10 c10 b10 a10}
-    vshuff64x2 _t0, _r3, _r11, 0x44  \ // t0  = {p2  o2  n2  m2   l2  k2  j2  i2   h2 g2 f2 e2   d2 c2 b2 a2}
-                                     \
-    vshuff64x2 _r11, _r1, _r9, 0xEE  \ // r11 = {p11 o11 n11 m11  l11 k11 j11 i11  h11 g11 f11 e11  d11 c11 b11 a11}
-    vshuff64x2 _r3, _r1, _r9, 0x44   \ // r3  = {p3  o3  n3  m3   l3  k3  j3  i3   h3 g3 f3 e3   d3 c3 b3 a3}
-                                     \
-    vshuff64x2 _r9, _r2, _r6, 0xEE   \ // r9  = {p9  o9  n9  m9   l9  k9  j9  i9   h9 g9 f9 e9   d9 c9 b9 a9}
-    vshuff64x2 _r1, _r2, _r6, 0x44   \ // r1  = {p1  o1  n1  m1   l1  k1  j1  i1   h1 g1 f1 e1   d1 c1 b1 a1}
-                                     \
-    vmovdqu32 _r2, _t0               \ // r2  = {p2  o2  n2  m2   l2  k2  j2  i2   h2 g2 f2 e2   d2 c2 b2 a2}
-    vmovdqu32 _r6, _t1               \ // r6  = {p6  o6  n6  m6   l6  k6  j6  i6   h6 g6 f6 e6   d6 c6 b6 a6}
-
-
-//  CH(A, B, C) = (A&B) ^ (~A&C)
-// MAJ(E, F, G) = (E&F) ^ (E&G) ^ (F&G)
-// SIGMA0 = ROR_2  ^ ROR_13 ^ ROR_22
-// SIGMA1 = ROR_6  ^ ROR_11 ^ ROR_25
-// sigma0 = ROR_7  ^ ROR_18 ^ SHR_3
-// sigma1 = ROR_17 ^ ROR_19 ^ SHR_10
-
-// Main processing loop per round
-#define PROCESS_LOOP(_WT, _ROUND, _A, _B, _C, _D, _E, _F, _G, _H)  \
-    \ // T1 = H + SIGMA1(E) + CH(E, F, G) + Kt + Wt
-    \ // T2 = SIGMA0(A) + MAJ(A, B, C)
-    \ // H=G, G=F, F=E, E=D+T1, D=C, C=B, B=A, A=T1+T2
-    \
-    \ // H becomes T2, then add T1 for A
-    \ // D becomes D + T1 for E
-    \
-    vpaddd      T1, _H, TMP3           \ // T1 = H + Kt
-    vmovdqu32   TMP0, _E               \
-    vprord      TMP1, _E, 6            \ // ROR_6(E)
-    vprord      TMP2, _E, 11           \ // ROR_11(E)
-    vprord      TMP3, _E, 25           \ // ROR_25(E)
-    vpternlogd  TMP0, _F, _G, 0xCA     \ // TMP0 = CH(E,F,G)
-    vpaddd      T1, T1, _WT            \ // T1 = T1 + Wt
-    vpternlogd  TMP1, TMP2, TMP3, 0x96 \ // TMP1 = SIGMA1(E)
-    vpaddd      T1, T1, TMP0           \ // T1 = T1 + CH(E,F,G)
-    vpaddd      T1, T1, TMP1           \ // T1 = T1 + SIGMA1(E)
-    vpaddd      _D, _D, T1             \ // D = D + T1
-                                       \
-    vprord      _H, _A, 2              \ // ROR_2(A)
-    vprord      TMP2, _A, 13           \ // ROR_13(A)
-    vprord      TMP3, _A, 22           \ // ROR_22(A)
-    vmovdqu32   TMP0, _A               \
-    vpternlogd  TMP0, _B, _C, 0xE8     \ // TMP0 = MAJ(A,B,C)
-    vpternlogd  _H, TMP2, TMP3, 0x96   \ // H(T2) = SIGMA0(A)
-    vpaddd      _H, _H, TMP0           \ // H(T2) = SIGMA0(A) + MAJ(A,B,C)
-    vpaddd      _H, _H, T1             \ // H(A) = H(T2) + T1
-                                       \
-    vmovdqu32   TMP3, [TBL + ((_ROUND+1)*64)] \ // Next Kt
-
-
-#define MSG_SCHED_ROUND_16_63(_WT, _WTp1, _WTp9, _WTp14) \
-    vprord      TMP4, _WTp14, 17                         \ // ROR_17(Wt-2)
-    vprord      TMP5, _WTp14, 19                         \ // ROR_19(Wt-2)
-    vpsrld      TMP6, _WTp14, 10                         \ // SHR_10(Wt-2)
-    vpternlogd  TMP4, TMP5, TMP6, 0x96                   \ // TMP4 = sigma1(Wt-2)
-                                                         \
-    vpaddd      _WT, _WT, TMP4	                         \ // Wt = Wt-16 + sigma1(Wt-2)
-    vpaddd      _WT, _WT, _WTp9	                         \ // Wt = Wt-16 + sigma1(Wt-2) + Wt-7
-                                                         \
-    vprord      TMP4, _WTp1, 7                           \ // ROR_7(Wt-15)
-    vprord      TMP5, _WTp1, 18                          \ // ROR_18(Wt-15)
-    vpsrld      TMP6, _WTp1, 3                           \ // SHR_3(Wt-15)
-    vpternlogd  TMP4, TMP5, TMP6, 0x96                   \ // TMP4 = sigma0(Wt-15)
-                                                         \
-    vpaddd      _WT, _WT, TMP4	                         \ // Wt = Wt-16 + sigma1(Wt-2) +
-                                                         \ //      Wt-7 + sigma0(Wt-15) +
-
-
-// Note this is reading in a block of data for one lane
-// When all 16 are read, the data must be transposed to build msg schedule
-#define MSG_SCHED_ROUND_00_15(_WT, OFFSET, LABEL)             \
-    TESTQ $(1<<OFFSET), MASK_P9                               \
-    JE    LABEL                                               \
-    MOVQ  OFFSET*24(INPUT_P9), R9                             \
-    vmovups _WT, [inp0+IDX]                                   \
-LABEL:                                                        \
-
-#define MASKED_LOAD(_WT, OFFSET, LABEL) \
-    TESTQ $(1<<OFFSET), MASK_P9         \
-    JE    LABEL                         \
-    MOVQ  OFFSET*24(INPUT_P9), R9       \
-    vmovups _WT,[inp0+IDX]              \
-LABEL:                                  \
-
-TEXT ·sha256_x16_avx512(SB), 7, $0
-    MOVQ  digests+0(FP), STATE_P9       //
-    MOVQ  scratch+8(FP), SCRATCH_P9
-    MOVQ  mask_len+32(FP), INP_SIZE_P9  // number of blocks to process
-    MOVQ  mask+24(FP), MASKP_P9
-    MOVQ (MASKP_P9), MASK_P9
-    kmovq k1, mask
-    LEAQ  inputs+48(FP), INPUT_P9
-
-    // Initialize digests
-    vmovdqu32 A, [STATE + 0*SHA256_DIGEST_ROW_SIZE]
-    vmovdqu32 B, [STATE + 1*SHA256_DIGEST_ROW_SIZE]
-    vmovdqu32 C, [STATE + 2*SHA256_DIGEST_ROW_SIZE]
-    vmovdqu32 D, [STATE + 3*SHA256_DIGEST_ROW_SIZE]
-    vmovdqu32 E, [STATE + 4*SHA256_DIGEST_ROW_SIZE]
-    vmovdqu32 F, [STATE + 5*SHA256_DIGEST_ROW_SIZE]
-    vmovdqu32 G, [STATE + 6*SHA256_DIGEST_ROW_SIZE]
-    vmovdqu32 H, [STATE + 7*SHA256_DIGEST_ROW_SIZE]
-
-    MOVQ  table+16(FP), TBL_P9
-
-    xor IDX, IDX
-
-    // Read in first block of input data
-    MASKED_LOAD( W0,  0, skipInput0)
-    MASKED_LOAD( W1,  1, skipInput1)
-    MASKED_LOAD( W2,  2, skipInput2)
-    MASKED_LOAD( W3,  3, skipInput3)
-    MASKED_LOAD( W4,  4, skipInput4)
-    MASKED_LOAD( W5,  5, skipInput5)
-    MASKED_LOAD( W6,  6, skipInput6)
-    MASKED_LOAD( W7,  7, skipInput7)
-    MASKED_LOAD( W8,  8, skipInput8)
-    MASKED_LOAD( W9,  9, skipInput9)
-    MASKED_LOAD(W10, 10, skipInput10)
-    MASKED_LOAD(W11, 11, skipInput11)
-    MASKED_LOAD(W12, 12, skipInput12)
-    MASKED_LOAD(W13, 13, skipInput13)
-    MASKED_LOAD(W14, 14, skipInput14)
-    MASKED_LOAD(W15, 15, skipInput15)
-
-lloop:
-    LEAQ PSHUFFLE_BYTE_FLIP_MASK<>(SB), TBL_P9
-    vmovdqu32 TMP2, [TBL]
-
-    // Get first K from table
-    MOVQ  table+16(FP), TBL_P9
-    vmovdqu32	TMP3, [TBL]
-
-    // Save digests for later addition
-    vmovdqu32 [SCRATCH + 64*0], A
-    vmovdqu32 [SCRATCH + 64*1], B
-    vmovdqu32 [SCRATCH + 64*2], C
-    vmovdqu32 [SCRATCH + 64*3], D
-    vmovdqu32 [SCRATCH + 64*4], E
-    vmovdqu32 [SCRATCH + 64*5], F
-    vmovdqu32 [SCRATCH + 64*6], G
-    vmovdqu32 [SCRATCH + 64*7], H
-
-    add IDX, 64
-
-    // Transpose input data
-    TRANSPOSE16(W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15, TMP0, TMP1)
-
-    vpshufb W0, W0, TMP2
-    vpshufb W1, W1, TMP2
-    vpshufb W2, W2, TMP2
-    vpshufb W3, W3, TMP2
-    vpshufb W4, W4, TMP2
-    vpshufb W5, W5, TMP2
-    vpshufb W6, W6, TMP2
-    vpshufb W7, W7, TMP2
-    vpshufb W8, W8, TMP2
-    vpshufb W9, W9, TMP2
-    vpshufb W10, W10, TMP2
-    vpshufb W11, W11, TMP2
-    vpshufb W12, W12, TMP2
-    vpshufb W13, W13, TMP2
-    vpshufb W14, W14, TMP2
-    vpshufb W15, W15, TMP2
-
-    // MSG Schedule for W0-W15 is now complete in registers
-    // Process first 48 rounds
-    // Calculate next Wt+16 after processing is complete and Wt is unneeded
-
-    PROCESS_LOOP( W0,  0, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_16_63( W0,  W1,  W9, W14)
-    PROCESS_LOOP( W1,  1, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_16_63( W1,  W2, W10, W15)
-    PROCESS_LOOP( W2,  2, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_16_63( W2,  W3, W11,  W0)
-    PROCESS_LOOP( W3,  3, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_16_63( W3,  W4, W12,  W1)
-    PROCESS_LOOP( W4,  4, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_16_63( W4,  W5, W13,  W2)
-    PROCESS_LOOP( W5,  5, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_16_63( W5,  W6, W14,  W3)
-    PROCESS_LOOP( W6,  6, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_16_63( W6,  W7, W15,  W4)
-    PROCESS_LOOP( W7,  7, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_16_63( W7,  W8,  W0,  W5)
-    PROCESS_LOOP( W8,  8, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_16_63( W8,  W9,  W1,  W6)
-    PROCESS_LOOP( W9,  9, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_16_63( W9, W10,  W2,  W7)
-    PROCESS_LOOP(W10, 10, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_16_63(W10, W11,  W3,  W8)
-    PROCESS_LOOP(W11, 11, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_16_63(W11, W12,  W4,  W9)
-    PROCESS_LOOP(W12, 12, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_16_63(W12, W13,  W5, W10)
-    PROCESS_LOOP(W13, 13, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_16_63(W13, W14,  W6, W11)
-    PROCESS_LOOP(W14, 14, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_16_63(W14, W15,  W7, W12)
-    PROCESS_LOOP(W15, 15, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_16_63(W15,  W0,  W8, W13)
-    PROCESS_LOOP( W0, 16, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_16_63( W0,  W1,  W9, W14)
-    PROCESS_LOOP( W1, 17, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_16_63( W1,  W2, W10, W15)
-    PROCESS_LOOP( W2, 18, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_16_63( W2,  W3, W11,  W0)
-    PROCESS_LOOP( W3, 19, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_16_63( W3,  W4, W12,  W1)
-    PROCESS_LOOP( W4, 20, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_16_63( W4,  W5, W13,  W2)
-    PROCESS_LOOP( W5, 21, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_16_63( W5,  W6, W14,  W3)
-    PROCESS_LOOP( W6, 22, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_16_63( W6,  W7, W15,  W4)
-    PROCESS_LOOP( W7, 23, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_16_63( W7,  W8,  W0,  W5)
-    PROCESS_LOOP( W8, 24, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_16_63( W8,  W9,  W1,  W6)
-    PROCESS_LOOP( W9, 25, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_16_63( W9, W10,  W2,  W7)
-    PROCESS_LOOP(W10, 26, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_16_63(W10, W11,  W3,  W8)
-    PROCESS_LOOP(W11, 27, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_16_63(W11, W12,  W4,  W9)
-    PROCESS_LOOP(W12, 28, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_16_63(W12, W13,  W5, W10)
-    PROCESS_LOOP(W13, 29, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_16_63(W13, W14,  W6, W11)
-    PROCESS_LOOP(W14, 30, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_16_63(W14, W15,  W7, W12)
-    PROCESS_LOOP(W15, 31, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_16_63(W15,  W0,  W8, W13)
-    PROCESS_LOOP( W0, 32, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_16_63( W0,  W1,  W9, W14)
-    PROCESS_LOOP( W1, 33, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_16_63( W1,  W2, W10, W15)
-    PROCESS_LOOP( W2, 34, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_16_63( W2,  W3, W11,  W0)
-    PROCESS_LOOP( W3, 35, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_16_63( W3,  W4, W12,  W1)
-    PROCESS_LOOP( W4, 36, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_16_63( W4,  W5, W13,  W2)
-    PROCESS_LOOP( W5, 37, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_16_63( W5,  W6, W14,  W3)
-    PROCESS_LOOP( W6, 38, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_16_63( W6,  W7, W15,  W4)
-    PROCESS_LOOP( W7, 39, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_16_63( W7,  W8,  W0,  W5)
-    PROCESS_LOOP( W8, 40, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_16_63( W8,  W9,  W1,  W6)
-    PROCESS_LOOP( W9, 41, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_16_63( W9, W10,  W2,  W7)
-    PROCESS_LOOP(W10, 42, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_16_63(W10, W11,  W3,  W8)
-    PROCESS_LOOP(W11, 43, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_16_63(W11, W12,  W4,  W9)
-    PROCESS_LOOP(W12, 44, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_16_63(W12, W13,  W5, W10)
-    PROCESS_LOOP(W13, 45, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_16_63(W13, W14,  W6, W11)
-    PROCESS_LOOP(W14, 46, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_16_63(W14, W15,  W7, W12)
-    PROCESS_LOOP(W15, 47, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_16_63(W15,  W0,  W8, W13)
-
-    // Check if this is the last block
-    sub INP_SIZE, 1
-    JE  lastLoop
-
-    // Load next mask for inputs
-    ADDQ $8, MASKP_P9
-    MOVQ (MASKP_P9), MASK_P9
-
-    // Process last 16 rounds
-    // Read in next block msg data for use in first 16 words of msg sched
-
-    PROCESS_LOOP( W0, 48, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_00_15( W0,  0, skipNext0)
-    PROCESS_LOOP( W1, 49, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_00_15( W1,  1, skipNext1)
-    PROCESS_LOOP( W2, 50, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_00_15( W2,  2, skipNext2)
-    PROCESS_LOOP( W3, 51, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_00_15( W3,  3, skipNext3)
-    PROCESS_LOOP( W4, 52, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_00_15( W4,  4, skipNext4)
-    PROCESS_LOOP( W5, 53, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_00_15( W5,  5, skipNext5)
-    PROCESS_LOOP( W6, 54, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_00_15( W6,  6, skipNext6)
-    PROCESS_LOOP( W7, 55, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_00_15( W7,  7, skipNext7)
-    PROCESS_LOOP( W8, 56, A, B, C, D, E, F, G, H)
-    MSG_SCHED_ROUND_00_15( W8,  8, skipNext8)
-    PROCESS_LOOP( W9, 57, H, A, B, C, D, E, F, G)
-    MSG_SCHED_ROUND_00_15( W9,  9, skipNext9)
-    PROCESS_LOOP(W10, 58, G, H, A, B, C, D, E, F)
-    MSG_SCHED_ROUND_00_15(W10, 10, skipNext10)
-    PROCESS_LOOP(W11, 59, F, G, H, A, B, C, D, E)
-    MSG_SCHED_ROUND_00_15(W11, 11, skipNext11)
-    PROCESS_LOOP(W12, 60, E, F, G, H, A, B, C, D)
-    MSG_SCHED_ROUND_00_15(W12, 12, skipNext12)
-    PROCESS_LOOP(W13, 61, D, E, F, G, H, A, B, C)
-    MSG_SCHED_ROUND_00_15(W13, 13, skipNext13)
-    PROCESS_LOOP(W14, 62, C, D, E, F, G, H, A, B)
-    MSG_SCHED_ROUND_00_15(W14, 14, skipNext14)
-    PROCESS_LOOP(W15, 63, B, C, D, E, F, G, H, A)
-    MSG_SCHED_ROUND_00_15(W15, 15, skipNext15)
-
-    // Add old digest
-    vmovdqu32  TMP2, A
-    vmovdqu32 A, [SCRATCH + 64*0]
-    vpaddd A{k1}, A, TMP2
-    vmovdqu32  TMP2, B
-    vmovdqu32 B, [SCRATCH + 64*1]
-    vpaddd B{k1}, B, TMP2
-    vmovdqu32  TMP2, C
-    vmovdqu32 C, [SCRATCH + 64*2]
-    vpaddd C{k1}, C, TMP2
-    vmovdqu32  TMP2, D
-    vmovdqu32 D, [SCRATCH + 64*3]
-    vpaddd D{k1}, D, TMP2
-    vmovdqu32  TMP2, E
-    vmovdqu32 E, [SCRATCH + 64*4]
-    vpaddd E{k1}, E, TMP2
-    vmovdqu32  TMP2, F
-    vmovdqu32 F, [SCRATCH + 64*5]
-    vpaddd F{k1}, F, TMP2
-    vmovdqu32  TMP2, G
-    vmovdqu32 G, [SCRATCH + 64*6]
-    vpaddd G{k1}, G, TMP2
-    vmovdqu32  TMP2, H
-    vmovdqu32 H, [SCRATCH + 64*7]
-    vpaddd H{k1}, H, TMP2
-
-    kmovq k1, mask
-    JMP lloop
-
-lastLoop:
-    // Process last 16 rounds
-    PROCESS_LOOP( W0, 48, A, B, C, D, E, F, G, H)
-    PROCESS_LOOP( W1, 49, H, A, B, C, D, E, F, G)
-    PROCESS_LOOP( W2, 50, G, H, A, B, C, D, E, F)
-    PROCESS_LOOP( W3, 51, F, G, H, A, B, C, D, E)
-    PROCESS_LOOP( W4, 52, E, F, G, H, A, B, C, D)
-    PROCESS_LOOP( W5, 53, D, E, F, G, H, A, B, C)
-    PROCESS_LOOP( W6, 54, C, D, E, F, G, H, A, B)
-    PROCESS_LOOP( W7, 55, B, C, D, E, F, G, H, A)
-    PROCESS_LOOP( W8, 56, A, B, C, D, E, F, G, H)
-    PROCESS_LOOP( W9, 57, H, A, B, C, D, E, F, G)
-    PROCESS_LOOP(W10, 58, G, H, A, B, C, D, E, F)
-    PROCESS_LOOP(W11, 59, F, G, H, A, B, C, D, E)
-    PROCESS_LOOP(W12, 60, E, F, G, H, A, B, C, D)
-    PROCESS_LOOP(W13, 61, D, E, F, G, H, A, B, C)
-    PROCESS_LOOP(W14, 62, C, D, E, F, G, H, A, B)
-    PROCESS_LOOP(W15, 63, B, C, D, E, F, G, H, A)
-
-    // Add old digest
-    vmovdqu32  TMP2, A
-    vmovdqu32 A, [SCRATCH + 64*0]
-    vpaddd A{k1}, A, TMP2
-    vmovdqu32  TMP2, B
-    vmovdqu32 B, [SCRATCH + 64*1]
-    vpaddd B{k1}, B, TMP2
-    vmovdqu32  TMP2, C
-    vmovdqu32 C, [SCRATCH + 64*2]
-    vpaddd C{k1}, C, TMP2
-    vmovdqu32  TMP2, D
-    vmovdqu32 D, [SCRATCH + 64*3]
-    vpaddd D{k1}, D, TMP2
-    vmovdqu32  TMP2, E
-    vmovdqu32 E, [SCRATCH + 64*4]
-    vpaddd E{k1}, E, TMP2
-    vmovdqu32  TMP2, F
-    vmovdqu32 F, [SCRATCH + 64*5]
-    vpaddd F{k1}, F, TMP2
-    vmovdqu32  TMP2, G
-    vmovdqu32 G, [SCRATCH + 64*6]
-    vpaddd G{k1}, G, TMP2
-    vmovdqu32  TMP2, H
-    vmovdqu32 H, [SCRATCH + 64*7]
-    vpaddd H{k1}, H, TMP2
-
-    // Write out digest
-    vmovdqu32 [STATE + 0*SHA256_DIGEST_ROW_SIZE], A
-    vmovdqu32 [STATE + 1*SHA256_DIGEST_ROW_SIZE], B
-    vmovdqu32 [STATE + 2*SHA256_DIGEST_ROW_SIZE], C
-    vmovdqu32 [STATE + 3*SHA256_DIGEST_ROW_SIZE], D
-    vmovdqu32 [STATE + 4*SHA256_DIGEST_ROW_SIZE], E
-    vmovdqu32 [STATE + 5*SHA256_DIGEST_ROW_SIZE], F
-    vmovdqu32 [STATE + 6*SHA256_DIGEST_ROW_SIZE], G
-    vmovdqu32 [STATE + 7*SHA256_DIGEST_ROW_SIZE], H
-
-    VZEROUPPER
-    RET
-
-//
-// Tables
-//
-
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x000(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x008(SB)/8, $0x0c0d0e0f08090a0b
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x010(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x018(SB)/8, $0x0c0d0e0f08090a0b
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x020(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x028(SB)/8, $0x0c0d0e0f08090a0b
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x030(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x038(SB)/8, $0x0c0d0e0f08090a0b
-GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), 8, $64
-
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x000(SB)/8, $0x0000000000000000
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x008(SB)/8, $0x0000000000000001
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x010(SB)/8, $0x0000000000000008
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x018(SB)/8, $0x0000000000000009
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x020(SB)/8, $0x0000000000000004
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x028(SB)/8, $0x0000000000000005
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x030(SB)/8, $0x000000000000000C
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x038(SB)/8, $0x000000000000000D
-GLOBL PSHUFFLE_TRANSPOSE16_MASK1<>(SB), 8, $64
-
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x000(SB)/8, $0x0000000000000002
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x008(SB)/8, $0x0000000000000003
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x010(SB)/8, $0x000000000000000A
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x018(SB)/8, $0x000000000000000B
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x020(SB)/8, $0x0000000000000006
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x028(SB)/8, $0x0000000000000007
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x030(SB)/8, $0x000000000000000E
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x038(SB)/8, $0x000000000000000F
-GLOBL PSHUFFLE_TRANSPOSE16_MASK2<>(SB), 8, $64
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go
deleted file mode 100644
index 4b9473a4..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.go
+++ /dev/null
@@ -1,501 +0,0 @@
-//go:build !noasm && !appengine && gc
-// +build !noasm,!appengine,gc
-
-/*
- * Minio Cloud Storage, (C) 2017 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package sha256
-
-import (
-	"encoding/binary"
-	"errors"
-	"hash"
-	"sort"
-	"sync/atomic"
-	"time"
-)
-
-//go:noescape
-func sha256X16Avx512(digests *[512]byte, scratch *[512]byte, table *[512]uint64, mask []uint64, inputs [16][]byte)
-
-// Avx512ServerUID - Do not start at 0 but next multiple of 16 so as to be able to
-// differentiate with default initialiation value of 0
-const Avx512ServerUID = 16
-
-var uidCounter uint64
-
-// NewAvx512 - initialize sha256 Avx512 implementation.
-func NewAvx512(a512srv *Avx512Server) hash.Hash {
-	uid := atomic.AddUint64(&uidCounter, 1)
-	return &Avx512Digest{uid: uid, a512srv: a512srv}
-}
-
-// Avx512Digest - Type for computing SHA256 using Avx512
-type Avx512Digest struct {
-	uid     uint64
-	a512srv *Avx512Server
-	x       [chunk]byte
-	nx      int
-	len     uint64
-	final   bool
-	result  [Size]byte
-}
-
-// Size - Return size of checksum
-func (d *Avx512Digest) Size() int { return Size }
-
-// BlockSize - Return blocksize of checksum
-func (d Avx512Digest) BlockSize() int { return BlockSize }
-
-// Reset - reset sha digest to its initial values
-func (d *Avx512Digest) Reset() {
-	d.a512srv.blocksCh <- blockInput{uid: d.uid, reset: true}
-	d.nx = 0
-	d.len = 0
-	d.final = false
-}
-
-// Write to digest
-func (d *Avx512Digest) Write(p []byte) (nn int, err error) {
-
-	if d.final {
-		return 0, errors.New("Avx512Digest already finalized. Reset first before writing again")
-	}
-
-	nn = len(p)
-	d.len += uint64(nn)
-	if d.nx > 0 {
-		n := copy(d.x[d.nx:], p)
-		d.nx += n
-		if d.nx == chunk {
-			d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: d.x[:]}
-			d.nx = 0
-		}
-		p = p[n:]
-	}
-	if len(p) >= chunk {
-		n := len(p) &^ (chunk - 1)
-		d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: p[:n]}
-		p = p[n:]
-	}
-	if len(p) > 0 {
-		d.nx = copy(d.x[:], p)
-	}
-	return
-}
-
-// Sum - Return sha256 sum in bytes
-func (d *Avx512Digest) Sum(in []byte) (result []byte) {
-
-	if d.final {
-		return append(in, d.result[:]...)
-	}
-
-	trail := make([]byte, 0, 128)
-	trail = append(trail, d.x[:d.nx]...)
-
-	len := d.len
-	// Padding.  Add a 1 bit and 0 bits until 56 bytes mod 64.
-	var tmp [64]byte
-	tmp[0] = 0x80
-	if len%64 < 56 {
-		trail = append(trail, tmp[0:56-len%64]...)
-	} else {
-		trail = append(trail, tmp[0:64+56-len%64]...)
-	}
-	d.nx = 0
-
-	// Length in bits.
-	len <<= 3
-	for i := uint(0); i < 8; i++ {
-		tmp[i] = byte(len >> (56 - 8*i))
-	}
-	trail = append(trail, tmp[0:8]...)
-
-	sumCh := make(chan [Size]byte)
-	d.a512srv.blocksCh <- blockInput{uid: d.uid, msg: trail, final: true, sumCh: sumCh}
-	d.result = <-sumCh
-	d.final = true
-	return append(in, d.result[:]...)
-}
-
-var table = [512]uint64{
-	0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
-	0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98, 0x428a2f98428a2f98,
-	0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
-	0x7137449171374491, 0x7137449171374491, 0x7137449171374491, 0x7137449171374491,
-	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
-	0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf, 0xb5c0fbcfb5c0fbcf,
-	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
-	0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5, 0xe9b5dba5e9b5dba5,
-	0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
-	0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b, 0x3956c25b3956c25b,
-	0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
-	0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1, 0x59f111f159f111f1,
-	0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
-	0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4, 0x923f82a4923f82a4,
-	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
-	0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5, 0xab1c5ed5ab1c5ed5,
-	0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
-	0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98, 0xd807aa98d807aa98,
-	0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
-	0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01, 0x12835b0112835b01,
-	0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
-	0x243185be243185be, 0x243185be243185be, 0x243185be243185be, 0x243185be243185be,
-	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
-	0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3, 0x550c7dc3550c7dc3,
-	0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
-	0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74, 0x72be5d7472be5d74,
-	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
-	0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe, 0x80deb1fe80deb1fe,
-	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
-	0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7, 0x9bdc06a79bdc06a7,
-	0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
-	0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174, 0xc19bf174c19bf174,
-	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
-	0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1, 0xe49b69c1e49b69c1,
-	0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
-	0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786, 0xefbe4786efbe4786,
-	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
-	0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6, 0x0fc19dc60fc19dc6,
-	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
-	0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc, 0x240ca1cc240ca1cc,
-	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
-	0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f, 0x2de92c6f2de92c6f,
-	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
-	0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa, 0x4a7484aa4a7484aa,
-	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
-	0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc, 0x5cb0a9dc5cb0a9dc,
-	0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
-	0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da, 0x76f988da76f988da,
-	0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
-	0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152, 0x983e5152983e5152,
-	0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
-	0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d, 0xa831c66da831c66d,
-	0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
-	0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8, 0xb00327c8b00327c8,
-	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
-	0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7, 0xbf597fc7bf597fc7,
-	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
-	0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3, 0xc6e00bf3c6e00bf3,
-	0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
-	0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147, 0xd5a79147d5a79147,
-	0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
-	0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351, 0x06ca635106ca6351,
-	0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
-	0x1429296714292967, 0x1429296714292967, 0x1429296714292967, 0x1429296714292967,
-	0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
-	0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85, 0x27b70a8527b70a85,
-	0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
-	0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138, 0x2e1b21382e1b2138,
-	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
-	0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc, 0x4d2c6dfc4d2c6dfc,
-	0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
-	0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13, 0x53380d1353380d13,
-	0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
-	0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354, 0x650a7354650a7354,
-	0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
-	0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb, 0x766a0abb766a0abb,
-	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
-	0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e, 0x81c2c92e81c2c92e,
-	0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
-	0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85, 0x92722c8592722c85,
-	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
-	0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1, 0xa2bfe8a1a2bfe8a1,
-	0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
-	0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b, 0xa81a664ba81a664b,
-	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
-	0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70, 0xc24b8b70c24b8b70,
-	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
-	0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3, 0xc76c51a3c76c51a3,
-	0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
-	0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819, 0xd192e819d192e819,
-	0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
-	0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624, 0xd6990624d6990624,
-	0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
-	0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585, 0xf40e3585f40e3585,
-	0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
-	0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070, 0x106aa070106aa070,
-	0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
-	0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116, 0x19a4c11619a4c116,
-	0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
-	0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08, 0x1e376c081e376c08,
-	0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
-	0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c, 0x2748774c2748774c,
-	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
-	0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5, 0x34b0bcb534b0bcb5,
-	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
-	0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3, 0x391c0cb3391c0cb3,
-	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
-	0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a, 0x4ed8aa4a4ed8aa4a,
-	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
-	0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f, 0x5b9cca4f5b9cca4f,
-	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
-	0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3, 0x682e6ff3682e6ff3,
-	0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
-	0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee, 0x748f82ee748f82ee,
-	0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
-	0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f, 0x78a5636f78a5636f,
-	0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
-	0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814, 0x84c8781484c87814,
-	0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
-	0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208, 0x8cc702088cc70208,
-	0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
-	0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa, 0x90befffa90befffa,
-	0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
-	0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb, 0xa4506ceba4506ceb,
-	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
-	0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7, 0xbef9a3f7bef9a3f7,
-	0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2,
-	0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2, 0xc67178f2c67178f2}
-
-// Interface function to assembly ode
-func blockAvx512(digests *[512]byte, input [16][]byte, mask []uint64) [16][Size]byte {
-
-	scratch := [512]byte{}
-	sha256X16Avx512(digests, &scratch, &table, mask, input)
-
-	output := [16][Size]byte{}
-	for i := 0; i < 16; i++ {
-		output[i] = getDigest(i, digests[:])
-	}
-
-	return output
-}
-
-func getDigest(index int, state []byte) (sum [Size]byte) {
-	for j := 0; j < 16; j += 2 {
-		for i := index*4 + j*Size; i < index*4+(j+1)*Size; i += Size {
-			binary.BigEndian.PutUint32(sum[j*2:], binary.LittleEndian.Uint32(state[i:i+4]))
-		}
-	}
-	return
-}
-
-// Message to send across input channel
-type blockInput struct {
-	uid   uint64
-	msg   []byte
-	reset bool
-	final bool
-	sumCh chan [Size]byte
-}
-
-// Avx512Server - Type to implement 16x parallel handling of SHA256 invocations
-type Avx512Server struct {
-	blocksCh chan blockInput       // Input channel
-	totalIn  int                   // Total number of inputs waiting to be processed
-	lanes    [16]Avx512LaneInfo    // Array with info per lane (out of 16)
-	digests  map[uint64][Size]byte // Map of uids to (interim) digest results
-}
-
-// Avx512LaneInfo - Info for each lane
-type Avx512LaneInfo struct {
-	uid      uint64          // unique identification for this SHA processing
-	block    []byte          // input block to be processed
-	outputCh chan [Size]byte // channel for output result
-}
-
-// NewAvx512Server - Create new object for parallel processing handling
-func NewAvx512Server() *Avx512Server {
-	a512srv := &Avx512Server{}
-	a512srv.digests = make(map[uint64][Size]byte)
-	a512srv.blocksCh = make(chan blockInput)
-
-	// Start a single thread for reading from the input channel
-	go a512srv.Process()
-	return a512srv
-}
-
-// Process - Sole handler for reading from the input channel
-func (a512srv *Avx512Server) Process() {
-	for {
-		select {
-		case block := <-a512srv.blocksCh:
-			if block.reset {
-				a512srv.reset(block.uid)
-				continue
-			}
-			index := block.uid & 0xf
-			// fmt.Println("Adding message:", block.uid, index)
-
-			if a512srv.lanes[index].block != nil { // If slot is already filled, process all inputs
-				//fmt.Println("Invoking Blocks()")
-				a512srv.blocks()
-			}
-			a512srv.totalIn++
-			a512srv.lanes[index] = Avx512LaneInfo{uid: block.uid, block: block.msg}
-			if block.final {
-				a512srv.lanes[index].outputCh = block.sumCh
-			}
-			if a512srv.totalIn == len(a512srv.lanes) {
-				// fmt.Println("Invoking Blocks() while FULL: ")
-				a512srv.blocks()
-			}
-
-			// TODO: test with larger timeout
-		case <-time.After(1 * time.Microsecond):
-			for _, lane := range a512srv.lanes {
-				if lane.block != nil { // check if there is any input to process
-					// fmt.Println("Invoking Blocks() on TIMEOUT: ")
-					a512srv.blocks()
-					break // we are done
-				}
-			}
-		}
-	}
-}
-
-// Do a reset for this calculation
-func (a512srv *Avx512Server) reset(uid uint64) {
-
-	// Check if there is a message still waiting to be processed (and remove if so)
-	for i, lane := range a512srv.lanes {
-		if lane.uid == uid {
-			if lane.block != nil {
-				a512srv.lanes[i] = Avx512LaneInfo{} // clear message
-				a512srv.totalIn--
-			}
-		}
-	}
-
-	// Delete entry from hash map
-	delete(a512srv.digests, uid)
-}
-
-// Invoke assembly and send results back
-func (a512srv *Avx512Server) blocks() {
-
-	inputs := [16][]byte{}
-	for i := range inputs {
-		inputs[i] = a512srv.lanes[i].block
-	}
-
-	mask := expandMask(genMask(inputs))
-	outputs := blockAvx512(a512srv.getDigests(), inputs, mask)
-
-	a512srv.totalIn = 0
-	for i := 0; i < len(outputs); i++ {
-		uid, outputCh := a512srv.lanes[i].uid, a512srv.lanes[i].outputCh
-		a512srv.digests[uid] = outputs[i]
-		a512srv.lanes[i] = Avx512LaneInfo{}
-
-		if outputCh != nil {
-			// Send back result
-			outputCh <- outputs[i]
-			delete(a512srv.digests, uid) // Delete entry from hashmap
-		}
-	}
-}
-
-func (a512srv *Avx512Server) Write(uid uint64, p []byte) (nn int, err error) {
-	a512srv.blocksCh <- blockInput{uid: uid, msg: p}
-	return len(p), nil
-}
-
-// Sum - return sha256 sum in bytes for a given sum id.
-func (a512srv *Avx512Server) Sum(uid uint64, p []byte) [32]byte {
-	sumCh := make(chan [32]byte)
-	a512srv.blocksCh <- blockInput{uid: uid, msg: p, final: true, sumCh: sumCh}
-	return <-sumCh
-}
-
-func (a512srv *Avx512Server) getDigests() *[512]byte {
-	digests := [512]byte{}
-	for i, lane := range a512srv.lanes {
-		a, ok := a512srv.digests[lane.uid]
-		if ok {
-			binary.BigEndian.PutUint32(digests[(i+0*16)*4:], binary.LittleEndian.Uint32(a[0:4]))
-			binary.BigEndian.PutUint32(digests[(i+1*16)*4:], binary.LittleEndian.Uint32(a[4:8]))
-			binary.BigEndian.PutUint32(digests[(i+2*16)*4:], binary.LittleEndian.Uint32(a[8:12]))
-			binary.BigEndian.PutUint32(digests[(i+3*16)*4:], binary.LittleEndian.Uint32(a[12:16]))
-			binary.BigEndian.PutUint32(digests[(i+4*16)*4:], binary.LittleEndian.Uint32(a[16:20]))
-			binary.BigEndian.PutUint32(digests[(i+5*16)*4:], binary.LittleEndian.Uint32(a[20:24]))
-			binary.BigEndian.PutUint32(digests[(i+6*16)*4:], binary.LittleEndian.Uint32(a[24:28]))
-			binary.BigEndian.PutUint32(digests[(i+7*16)*4:], binary.LittleEndian.Uint32(a[28:32]))
-		} else {
-			binary.LittleEndian.PutUint32(digests[(i+0*16)*4:], init0)
-			binary.LittleEndian.PutUint32(digests[(i+1*16)*4:], init1)
-			binary.LittleEndian.PutUint32(digests[(i+2*16)*4:], init2)
-			binary.LittleEndian.PutUint32(digests[(i+3*16)*4:], init3)
-			binary.LittleEndian.PutUint32(digests[(i+4*16)*4:], init4)
-			binary.LittleEndian.PutUint32(digests[(i+5*16)*4:], init5)
-			binary.LittleEndian.PutUint32(digests[(i+6*16)*4:], init6)
-			binary.LittleEndian.PutUint32(digests[(i+7*16)*4:], init7)
-		}
-	}
-	return &digests
-}
-
-// Helper struct for sorting blocks based on length
-type lane struct {
-	len uint
-	pos uint
-}
-
-type lanes []lane
-
-func (lns lanes) Len() int           { return len(lns) }
-func (lns lanes) Swap(i, j int)      { lns[i], lns[j] = lns[j], lns[i] }
-func (lns lanes) Less(i, j int) bool { return lns[i].len < lns[j].len }
-
-// Helper struct for
-type maskRounds struct {
-	mask   uint64
-	rounds uint64
-}
-
-func genMask(input [16][]byte) [16]maskRounds {
-
-	// Sort on blocks length small to large
-	var sorted [16]lane
-	for c, inpt := range input {
-		sorted[c] = lane{uint(len(inpt)), uint(c)}
-	}
-	sort.Sort(lanes(sorted[:]))
-
-	// Create mask array including 'rounds' between masks
-	m, round, index := uint64(0xffff), uint64(0), 0
-	var mr [16]maskRounds
-	for _, s := range sorted {
-		if s.len > 0 {
-			if uint64(s.len)>>6 > round {
-				mr[index] = maskRounds{m, (uint64(s.len) >> 6) - round}
-				index++
-			}
-			round = uint64(s.len) >> 6
-		}
-		m = m & ^(1 << uint(s.pos))
-	}
-
-	return mr
-}
-
-// TODO: remove function
-func expandMask(mr [16]maskRounds) []uint64 {
-	size := uint64(0)
-	for _, r := range mr {
-		size += r.rounds
-	}
-	result, index := make([]uint64, size), 0
-	for _, r := range mr {
-		for j := uint64(0); j < r.rounds; j++ {
-			result[index] = r.mask
-			index++
-		}
-	}
-	return result
-}
diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
deleted file mode 100644
index cca534e4..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
+++ /dev/null
@@ -1,267 +0,0 @@
-//+build !noasm,!appengine,gc
-
-TEXT ·sha256X16Avx512(SB), 7, $0
-	MOVQ  digests+0(FP), DI
-	MOVQ  scratch+8(FP), R12
-	MOVQ  mask_len+32(FP), SI
-	MOVQ  mask_base+24(FP), R13
-	MOVQ  (R13), R14
-	LONG  $0x92fbc1c4; BYTE $0xce
-	LEAQ  inputs+48(FP), AX
-	QUAD  $0xf162076f487ef162; QUAD $0x7ef162014f6f487e; QUAD $0x487ef16202576f48; QUAD $0x6f487ef162035f6f; QUAD $0x6f6f487ef1620467; QUAD $0x06776f487ef16205; LONG $0x487ef162; WORD $0x7f6f; BYTE $0x07
-	MOVQ  table+16(FP), DX
-	WORD  $0x3148; BYTE $0xc9
-	TESTQ $(1<<0), R14
-	JE    skipInput0
-	MOVQ  0*24(AX), R9
-	LONG  $0x487cc162; WORD $0x0410; BYTE $0x09
-
-skipInput0:
-	TESTQ $(1<<1), R14
-	JE    skipInput1
-	MOVQ  1*24(AX), R9
-	LONG  $0x487cc162; WORD $0x0c10; BYTE $0x09
-
-skipInput1:
-	TESTQ $(1<<2), R14
-	JE    skipInput2
-	MOVQ  2*24(AX), R9
-	LONG  $0x487cc162; WORD $0x1410; BYTE $0x09
-
-skipInput2:
-	TESTQ $(1<<3), R14
-	JE    skipInput3
-	MOVQ  3*24(AX), R9
-	LONG  $0x487cc162; WORD $0x1c10; BYTE $0x09
-
-skipInput3:
-	TESTQ $(1<<4), R14
-	JE    skipInput4
-	MOVQ  4*24(AX), R9
-	LONG  $0x487cc162; WORD $0x2410; BYTE $0x09
-
-skipInput4:
-	TESTQ $(1<<5), R14
-	JE    skipInput5
-	MOVQ  5*24(AX), R9
-	LONG  $0x487cc162; WORD $0x2c10; BYTE $0x09
-
-skipInput5:
-	TESTQ $(1<<6), R14
-	JE    skipInput6
-	MOVQ  6*24(AX), R9
-	LONG  $0x487cc162; WORD $0x3410; BYTE $0x09
-
-skipInput6:
-	TESTQ $(1<<7), R14
-	JE    skipInput7
-	MOVQ  7*24(AX), R9
-	LONG  $0x487cc162; WORD $0x3c10; BYTE $0x09
-
-skipInput7:
-	TESTQ $(1<<8), R14
-	JE    skipInput8
-	MOVQ  8*24(AX), R9
-	LONG  $0x487c4162; WORD $0x0410; BYTE $0x09
-
-skipInput8:
-	TESTQ $(1<<9), R14
-	JE    skipInput9
-	MOVQ  9*24(AX), R9
-	LONG  $0x487c4162; WORD $0x0c10; BYTE $0x09
-
-skipInput9:
-	TESTQ $(1<<10), R14
-	JE    skipInput10
-	MOVQ  10*24(AX), R9
-	LONG  $0x487c4162; WORD $0x1410; BYTE $0x09
-
-skipInput10:
-	TESTQ $(1<<11), R14
-	JE    skipInput11
-	MOVQ  11*24(AX), R9
-	LONG  $0x487c4162; WORD $0x1c10; BYTE $0x09
-
-skipInput11:
-	TESTQ $(1<<12), R14
-	JE    skipInput12
-	MOVQ  12*24(AX), R9
-	LONG  $0x487c4162; WORD $0x2410; BYTE $0x09
-
-skipInput12:
-	TESTQ $(1<<13), R14
-	JE    skipInput13
-	MOVQ  13*24(AX), R9
-	LONG  $0x487c4162; WORD $0x2c10; BYTE $0x09
-
-skipInput13:
-	TESTQ $(1<<14), R14
-	JE    skipInput14
-	MOVQ  14*24(AX), R9
-	LONG  $0x487c4162; WORD $0x3410; BYTE $0x09
-
-skipInput14:
-	TESTQ $(1<<15), R14
-	JE    skipInput15
-	MOVQ  15*24(AX), R9
-	LONG  $0x487c4162; WORD $0x3c10; BYTE $0x09
-
-skipInput15:
-lloop:
-	LEAQ                 PSHUFFLE_BYTE_FLIP_MASK<>(SB), DX
-	LONG                 $0x487e7162; WORD $0x1a6f
-	MOVQ                 table+16(FP), DX
-	QUAD                 $0xd162226f487e7162; QUAD $0x7ed16224047f487e; QUAD $0x7ed16201244c7f48; QUAD $0x7ed1620224547f48; QUAD $0x7ed16203245c7f48; QUAD $0x7ed1620424647f48; QUAD $0x7ed16205246c7f48; QUAD $0x7ed1620624747f48; QUAD $0xc1834807247c7f48; QUAD $0x44c9c6407c316240; QUAD $0x62eec1c6407ca162; QUAD $0xa16244d3c6406c31; QUAD $0x34c162eed3c6406c; QUAD $0x407ca162dddac648; QUAD $0xc6407ca16288cac6; QUAD $0xcac648345162ddc2; QUAD $0x44d5c6405ca16288; QUAD $0x62eee5c6405ca162; QUAD $0xa16244d7c6404c31; QUAD $0x6cc162eef7c6404c; QUAD $0x405ca162ddfac640; QUAD $0xc6405ca16288eec6; QUAD $0xd2c6406cc162dde6; QUAD $0x44f1c6403c816288; QUAD $0x62eec1c6403c0162; QUAD $0x016244d3c6402c11; QUAD $0x4c4162eed3c6402c; QUAD $0x403c0162dddac640; QUAD $0xc6403c016288cac6; QUAD $0xf2c6404cc162ddc2; QUAD $0x44d5c6401c016288; QUAD $0x62eee5c6401c0162; QUAD $0x016244d7c6400c11; QUAD $0x2c4162eef7c6400c; QUAD $0x401c0162ddfac640; QUAD $0xc6401c016288eec6; QUAD $0xd2c6402c4162dde6; BYTE $0x88
-	LEAQ                 PSHUFFLE_TRANSPOSE16_MASK1<>(SB), BX
-	LEAQ                 PSHUFFLE_TRANSPOSE16_MASK2<>(SB), R8
-	QUAD                 $0x2262336f487e6162; QUAD $0x487e5162f27648b5; QUAD $0xd27648b53262106f; QUAD $0xa262136f487ee162; QUAD $0x487e5162d77640e5; QUAD $0xcf7640e53262086f; QUAD $0xa2621b6f487ee162; QUAD $0x487ec162dd7640f5; QUAD $0xfd7640f5a262386f; QUAD $0xa2620b6f487ee162; QUAD $0x487ec162cc7640fd; QUAD $0xec7640fda262286f; QUAD $0x8262036f487ee162; QUAD $0x487ec162c27640cd; QUAD $0xe27640cd8262206f; QUAD $0x8262336f487ee162; QUAD $0x487e4162f77640a5; QUAD $0xd77640a50262106f; QUAD $0x02621b6f487e6162; QUAD $0x487e4162dd7640b5; QUAD $0xfd7640b50262386f; QUAD $0x02620b6f487e6162; QUAD $0x487e4162cc7640bd; QUAD $0xec7640bd0262286f; QUAD $0x62eec023408d2362; QUAD $0x236244c023408da3; QUAD $0xada362eee42348ad; QUAD $0x40c5036244e42348; QUAD $0x2340c51362eef723; QUAD $0xfd2340d5036244d7; QUAD $0x44fd2340d58362ee; QUAD $0x62eeea2348b50362; QUAD $0x036244ea2348b583; QUAD $0xe51362eed32340e5; QUAD $0x40f5036244cb2340; QUAD $0x2340f58362eed923; QUAD $0xce2340ed236244d9; QUAD $0x44ce2340eda362ee; QUAD $0xc162d16f487ec162; QUAD $0x407dc262f26f487e; QUAD $0xcb004075c262c300; QUAD $0xc262d300406dc262; QUAD $0x405dc262db004065; QUAD $0xeb004055c262e300; QUAD $0xc262f300404dc262; QUAD $0x403d4262fb004045; QUAD $0xcb0040354262c300; QUAD $0x4262d300402d4262; QUAD $0x401d4262db004025; QUAD $0xeb0040154262e300; QUAD $0x4262f300400d4262; QUAD $0x48455162fb004005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6201626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916202626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16203; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16204626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16205626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x06626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16207626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1620862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6209626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1620a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591620b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91620c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591620d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x0e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591620f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591621062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6211626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916212626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16213; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16214626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16215626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x16626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16217626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1621862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6219626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1621a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591621b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91621c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591621d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x1e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591621f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591622062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x48455162fdfe4005; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d3162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6221626f487e7162; QUAD $0x916211c672481591; QUAD $0x05916213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe407dc16296ef25; QUAD $0x62c1fe407d8162c5; QUAD $0xb16207c1724815b1; QUAD $0x05b16212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe407dc16296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815916222626f48; QUAD $0x72480d916211c772; QUAD $0xd7724805916213c7; QUAD $0x96ef25480d53620a; QUAD $0x8162cdfe4075c162; QUAD $0x4815b162cafe4075; QUAD $0x72480db16207c272; QUAD $0xd2724805b16212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe4075c162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c0724815b16223; QUAD $0x6213c072480db162; QUAD $0x53620ad0724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe406d8162d5fe40; QUAD $0x07c3724815b162d3; QUAD $0x6212c372480db162; QUAD $0x536203d3724805b1; QUAD $0x6dc16296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d3162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0xb16224626f487e71; QUAD $0x0db16211c1724815; QUAD $0x4805b16213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4065c16296ef; QUAD $0xb162dcfe40658162; QUAD $0x0db16207c4724815; QUAD $0x4805b16212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4065c16296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x724815b16225626f; QUAD $0xc272480db16211c2; QUAD $0x0ad2724805b16213; QUAD $0x6296ef25480d5362; QUAD $0x5d8162e5fe405dc1; QUAD $0x724815b162e5fe40; QUAD $0xc572480db16207c5; QUAD $0x03d5724805b16212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe405dc1; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d3162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x26626f487e7162d0; QUAD $0x6211c3724815b162; QUAD $0xb16213c372480db1; QUAD $0x0d53620ad3724805; QUAD $0x4055c16296ef2548; QUAD $0xeefe40558162edfe; QUAD $0x6207c6724815b162; QUAD $0xb16212c672480db1; QUAD $0x0d536203d6724805; QUAD $0x4055c16296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x15b16227626f487e; QUAD $0x480db16211c47248; QUAD $0x724805b16213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe404dc16296; QUAD $0x15b162f7fe404d81; QUAD $0x480db16207c77248; QUAD $0x724805b16212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe404dc16296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc5724815b1622862; QUAD $0x13c572480db16211; QUAD $0x620ad5724805b162; QUAD $0xc16296ef25480d53; QUAD $0x4045a162fdfe4045; QUAD $0xc07248159162f8fe; QUAD $0x12c072480d916207; QUAD $0x6203d07248059162; QUAD $0xc16296ef25480d53; QUAD $0x48455162fdfe4045; QUAD $0xcc6f487e7162c4fe; QUAD $0x6206c472482df162; QUAD $0xf1620bc4724825f1; QUAD $0x55736219c472481d; QUAD $0x483d1162cace2548; QUAD $0xd42548255362c0fe; QUAD $0x62c1fe483d516296; QUAD $0x65d162c2fe483d51; QUAD $0x724845f162d8fe48; QUAD $0xc0724825f16202c0; QUAD $0x16c072481df1620d; QUAD $0x7362c86f487e7162; QUAD $0x25d362e8ca254875; QUAD $0x4845d16296fc2548; QUAD $0xf8fe4845d162f9fe; QUAD $0x6229626f487e7162; QUAD $0xb16211c6724815b1; QUAD $0x05b16213c672480d; QUAD $0x480d53620ad67248; QUAD $0xfe403d416296ef25; QUAD $0x62c1fe403d2162c5; QUAD $0x916207c172481591; QUAD $0x05916212c172480d; QUAD $0x480d536203d17248; QUAD $0xfe403d416296ef25; QUAD $0x62c4fe484d5162c5; QUAD $0x2df162cb6f487e71; QUAD $0x4825f16206c37248; QUAD $0x72481df1620bc372; QUAD $0xcd25485d736219c3; QUAD $0x62c1fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xd0fe486dd162c2fe; QUAD $0x6202c772484df162; QUAD $0xf1620dc7724825f1; QUAD $0x7e716216c772481d; QUAD $0x25487d7362cf6f48; QUAD $0xf4254825d362e8c9; QUAD $0x62f1fe484dd16296; QUAD $0x7e7162f0fe484dd1; QUAD $0x4815b1622a626f48; QUAD $0x72480db16211c772; QUAD $0xd7724805b16213c7; QUAD $0x96ef25480d53620a; QUAD $0x2162cdfe40354162; QUAD $0x48159162cafe4035; QUAD $0x72480d916207c272; QUAD $0xd2724805916212c2; QUAD $0x96ef25480d536203; QUAD $0x5162cdfe40354162; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x11c072481591622b; QUAD $0x6213c072480d9162; QUAD $0x53620ad072480591; QUAD $0x2d416296ef25480d; QUAD $0xfe402d2162d5fe40; QUAD $0x07c37248159162d3; QUAD $0x6212c372480d9162; QUAD $0x536203d372480591; QUAD $0x2d416296ef25480d; QUAD $0xfe485d5162d5fe40; QUAD $0x62c96f487e7162c4; QUAD $0xf16206c172482df1; QUAD $0x1df1620bc1724825; QUAD $0x486d736219c17248; QUAD $0xfe483d1162cacb25; QUAD $0x96d42548255362c3; QUAD $0x5162c1fe483d5162; QUAD $0x487dd162c2fe483d; QUAD $0xc572485df162c0fe; QUAD $0x0dc5724825f16202; QUAD $0x6216c572481df162; QUAD $0x4d7362cd6f487e71; QUAD $0x4825d362e8cf2548; QUAD $0xfe485dd16296e425; QUAD $0x62e0fe485dd162e1; QUAD $0x91622c626f487e71; QUAD $0x0d916211c1724815; QUAD $0x4805916213c17248; QUAD $0x25480d53620ad172; QUAD $0xddfe4025416296ef; QUAD $0x9162dcfe40252162; QUAD $0x0d916207c4724815; QUAD $0x4805916212c47248; QUAD $0x25480d536203d472; QUAD $0xddfe4025416296ef; QUAD $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; QUAD $0x72481591622d626f; QUAD $0xc272480d916211c2; QUAD $0x0ad2724805916213; QUAD $0x6296ef25480d5362; QUAD $0x1d2162e5fe401d41; QUAD $0x7248159162e5fe40; QUAD $0xc572480d916207c5; QUAD $0x03d5724805916212; QUAD $0x6296ef25480d5362; QUAD $0x6d5162e5fe401d41; QUAD $0x6f487e7162c4fe48; QUAD $0x06c772482df162cf; QUAD $0x620bc7724825f162; QUAD $0x736219c772481df1; QUAD $0x3d1162cac925487d; QUAD $0x2548255362c5fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x486df162f0fe484d; QUAD $0x724825f16202c372; QUAD $0xc372481df1620dc3; QUAD $0x62cb6f487e716216; QUAD $0xd362e8cd25485d73; QUAD $0x6dd16296d4254825; QUAD $0xfe486dd162d1fe48; QUAD $0x2e626f487e7162d0; QUAD $0x6211c37248159162; QUAD $0x916213c372480d91; QUAD $0x0d53620ad3724805; QUAD $0x4015416296ef2548; QUAD $0xeefe40152162edfe; QUAD $0x6207c67248159162; QUAD $0x916212c672480d91; QUAD $0x0d536203d6724805; QUAD $0x4015416296ef2548; QUAD $0xc4fe48755162edfe; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x1591622f626f487e; QUAD $0x480d916211c47248; QUAD $0x724805916213c472; QUAD $0xef25480d53620ad4; QUAD $0x62f5fe400d416296; QUAD $0x159162f7fe400d21; QUAD $0x480d916207c77248; QUAD $0x724805916212c772; QUAD $0xef25480d536203d7; QUAD $0x62f5fe400d416296; QUAD $0x7e7162c4fe487d51; QUAD $0x72482df162cd6f48; QUAD $0xc5724825f16206c5; QUAD $0x19c572481df1620b; QUAD $0x62cacf25484d7362; QUAD $0x255362c7fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162e0fe485dd162; QUAD $0x25f16202c172487d; QUAD $0x481df1620dc17248; QUAD $0x6f487e716216c172; QUAD $0xe8cb25486d7362c9; QUAD $0x6296c4254825d362; QUAD $0x7dd162c1fe487dd1; QUAD $0x6f487e7162c0fe48; QUAD $0xc572481591623062; QUAD $0x13c572480d916211; QUAD $0x620ad57248059162; QUAD $0x416296ef25480d53; QUAD $0x40050162fdfe4005; QUAD $0xc0724815b162f8fe; QUAD $0x12c072480db16207; QUAD $0x6203d0724805b162; QUAD $0x416296ef25480d53; QUAD $0x01ee8348fdfe4005
-	JE                   lastLoop
-	ADDQ                 $8, R13
-	MOVQ                 (R13), R14
-	QUAD                 $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x31
-	TESTQ                $(1<<0), R14
-	JE                   skipNext0
-	MOVQ                 0*24(AX), R9
-	LONG                 $0x487cc162; WORD $0x0410; BYTE $0x09
-
-skipNext0:
-	QUAD  $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x32
-	TESTQ $(1<<1), R14
-	JE    skipNext1
-	MOVQ  1*24(AX), R9
-	LONG  $0x487cc162; WORD $0x0c10; BYTE $0x09
-
-skipNext1:
-	QUAD  $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x33
-	TESTQ $(1<<2), R14
-	JE    skipNext2
-	MOVQ  2*24(AX), R9
-	LONG  $0x487cc162; WORD $0x1410; BYTE $0x09
-
-skipNext2:
-	QUAD  $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x34
-	TESTQ $(1<<3), R14
-	JE    skipNext3
-	MOVQ  3*24(AX), R9
-	LONG  $0x487cc162; WORD $0x1c10; BYTE $0x09
-
-skipNext3:
-	QUAD  $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x35
-	TESTQ $(1<<4), R14
-	JE    skipNext4
-	MOVQ  4*24(AX), R9
-	LONG  $0x487cc162; WORD $0x2410; BYTE $0x09
-
-skipNext4:
-	QUAD  $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x36
-	TESTQ $(1<<5), R14
-	JE    skipNext5
-	MOVQ  5*24(AX), R9
-	LONG  $0x487cc162; WORD $0x2c10; BYTE $0x09
-
-skipNext5:
-	QUAD  $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x37
-	TESTQ $(1<<6), R14
-	JE    skipNext6
-	MOVQ  6*24(AX), R9
-	LONG  $0x487cc162; WORD $0x3410; BYTE $0x09
-
-skipNext6:
-	QUAD  $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x38
-	TESTQ $(1<<7), R14
-	JE    skipNext7
-	MOVQ  7*24(AX), R9
-	LONG  $0x487cc162; WORD $0x3c10; BYTE $0x09
-
-skipNext7:
-	QUAD  $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; WORD $0x626f; BYTE $0x39
-	TESTQ $(1<<8), R14
-	JE    skipNext8
-	MOVQ  8*24(AX), R9
-	LONG  $0x487c4162; WORD $0x0410; BYTE $0x09
-
-skipNext8:
-	QUAD  $0x7162c4fe484d5162; QUAD $0x482df162cb6f487e; QUAD $0x724825f16206c372; QUAD $0xc372481df1620bc3; QUAD $0xcacd25485d736219; QUAD $0x5362c1fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d0fe486dd162c2; QUAD $0xf16202c772484df1; QUAD $0x1df1620dc7724825; QUAD $0x487e716216c77248; QUAD $0xc925487d7362cf6f; QUAD $0x96f4254825d362e8; QUAD $0xd162f1fe484dd162; QUAD $0x487e7162f0fe484d; WORD $0x626f; BYTE $0x3a
-	TESTQ $(1<<9), R14
-	JE    skipNext9
-	MOVQ  9*24(AX), R9
-	LONG  $0x487c4162; WORD $0x0c10; BYTE $0x09
-
-skipNext9:
-	QUAD  $0x7162c4fe48555162; QUAD $0x482df162ca6f487e; QUAD $0x724825f16206c272; QUAD $0xc272481df1620bc2; QUAD $0xcacc254865736219; QUAD $0x5362c2fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c8fe4875d162c2; QUAD $0xf16202c6724855f1; QUAD $0x1df1620dc6724825; QUAD $0x487e716216c67248; QUAD $0xc82548457362ce6f; QUAD $0x96ec254825d362e8; QUAD $0xd162e9fe4855d162; QUAD $0x487e7162e8fe4855; WORD $0x626f; BYTE $0x3b
-	TESTQ $(1<<10), R14
-	JE    skipNext10
-	MOVQ  10*24(AX), R9
-	LONG  $0x487c4162; WORD $0x1410; BYTE $0x09
-
-skipNext10:
-	QUAD  $0x7162c4fe485d5162; QUAD $0x482df162c96f487e; QUAD $0x724825f16206c172; QUAD $0xc172481df1620bc1; QUAD $0xcacb25486d736219; QUAD $0x5362c3fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62c0fe487dd162c2; QUAD $0xf16202c572485df1; QUAD $0x1df1620dc5724825; QUAD $0x487e716216c57248; QUAD $0xcf25484d7362cd6f; QUAD $0x96e4254825d362e8; QUAD $0xd162e1fe485dd162; QUAD $0x487e7162e0fe485d; WORD $0x626f; BYTE $0x3c
-	TESTQ $(1<<11), R14
-	JE    skipNext11
-	MOVQ  11*24(AX), R9
-	LONG  $0x487c4162; WORD $0x1c10; BYTE $0x09
-
-skipNext11:
-	QUAD  $0x7162c4fe48655162; QUAD $0x482df162c86f487e; QUAD $0x724825f16206c072; QUAD $0xc072481df1620bc0; QUAD $0xcaca254875736219; QUAD $0x5362c4fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f8fe4845d162c2; QUAD $0xf16202c4724865f1; QUAD $0x1df1620dc4724825; QUAD $0x487e716216c47248; QUAD $0xce2548557362cc6f; QUAD $0x96dc254825d362e8; QUAD $0xd162d9fe4865d162; QUAD $0x487e7162d8fe4865; WORD $0x626f; BYTE $0x3d
-	TESTQ $(1<<12), R14
-	JE    skipNext12
-	MOVQ  12*24(AX), R9
-	LONG  $0x487c4162; WORD $0x2410; BYTE $0x09
-
-skipNext12:
-	QUAD  $0x7162c4fe486d5162; QUAD $0x482df162cf6f487e; QUAD $0x724825f16206c772; QUAD $0xc772481df1620bc7; QUAD $0xcac925487d736219; QUAD $0x5362c5fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62f0fe484dd162c2; QUAD $0xf16202c372486df1; QUAD $0x1df1620dc3724825; QUAD $0x487e716216c37248; QUAD $0xcd25485d7362cb6f; QUAD $0x96d4254825d362e8; QUAD $0xd162d1fe486dd162; QUAD $0x487e7162d0fe486d; WORD $0x626f; BYTE $0x3e
-	TESTQ $(1<<13), R14
-	JE    skipNext13
-	MOVQ  13*24(AX), R9
-	LONG  $0x487c4162; WORD $0x2c10; BYTE $0x09
-
-skipNext13:
-	QUAD  $0x7162c4fe48755162; QUAD $0x482df162ce6f487e; QUAD $0x724825f16206c672; QUAD $0xc672481df1620bc6; QUAD $0xcac8254845736219; QUAD $0x5362c6fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e8fe4855d162c2; QUAD $0xf16202c2724875f1; QUAD $0x1df1620dc2724825; QUAD $0x487e716216c27248; QUAD $0xcc2548657362ca6f; QUAD $0x96cc254825d362e8; QUAD $0xd162c9fe4875d162; QUAD $0x487e7162c8fe4875; WORD $0x626f; BYTE $0x3f
-	TESTQ $(1<<14), R14
-	JE    skipNext14
-	MOVQ  14*24(AX), R9
-	LONG  $0x487c4162; WORD $0x3410; BYTE $0x09
-
-skipNext14:
-	QUAD  $0x7162c4fe487d5162; QUAD $0x482df162cd6f487e; QUAD $0x724825f16206c572; QUAD $0xc572481df1620bc5; QUAD $0xcacf25484d736219; QUAD $0x5362c7fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62e0fe485dd162c2; QUAD $0xf16202c172487df1; QUAD $0x1df1620dc1724825; QUAD $0x487e716216c17248; QUAD $0xcb25486d7362c96f; QUAD $0x96c4254825d362e8; QUAD $0xd162c1fe487dd162; QUAD $0x487e7162c0fe487d; WORD $0x626f; BYTE $0x40
-	TESTQ $(1<<15), R14
-	JE    skipNext15
-	MOVQ  15*24(AX), R9
-	LONG  $0x487c4162; WORD $0x3c10; BYTE $0x09
-
-skipNext15:
-	QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0xc4fbfe4945d16207; LONG $0xce92fbc1
-	JMP  lloop
-
-lastLoop:
-	QUAD         $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d3162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516231626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d3162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x516232626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x3162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d516233; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d3162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x4865516234626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d3162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x6235626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d31; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623662; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d3162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d516237626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d3162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x38626f487e7162c0; QUAD $0x7162c4fe48455162; QUAD $0x482df162cc6f487e; QUAD $0x724825f16206c472; QUAD $0xc472481df1620bc4; QUAD $0xcace254855736219; QUAD $0x5362c0fe483d1162; QUAD $0x3d516296d4254825; QUAD $0xfe483d5162c1fe48; QUAD $0x62d8fe4865d162c2; QUAD $0xf16202c0724845f1; QUAD $0x1df1620dc0724825; QUAD $0x487e716216c07248; QUAD $0xca2548757362c86f; QUAD $0x96fc254825d362e8; QUAD $0xd162f9fe4845d162; QUAD $0x487e7162f8fe4845; QUAD $0xfe484d516239626f; QUAD $0x62cb6f487e7162c4; QUAD $0xf16206c372482df1; QUAD $0x1df1620bc3724825; QUAD $0x485d736219c37248; QUAD $0xfe483d1162cacd25; QUAD $0x96d42548255362c1; QUAD $0x5162c1fe483d5162; QUAD $0x486dd162c2fe483d; QUAD $0xc772484df162d0fe; QUAD $0x0dc7724825f16202; QUAD $0x6216c772481df162; QUAD $0x7d7362cf6f487e71; QUAD $0x4825d362e8c92548; QUAD $0xfe484dd16296f425; QUAD $0x62f0fe484dd162f1; QUAD $0x51623a626f487e71; QUAD $0x487e7162c4fe4855; QUAD $0xc272482df162ca6f; QUAD $0x0bc2724825f16206; QUAD $0x6219c272481df162; QUAD $0x1162cacc25486573; QUAD $0x48255362c2fe483d; QUAD $0xfe483d516296d425; QUAD $0x62c2fe483d5162c1; QUAD $0x55f162c8fe4875d1; QUAD $0x4825f16202c67248; QUAD $0x72481df1620dc672; QUAD $0xce6f487e716216c6; QUAD $0x62e8c82548457362; QUAD $0xd16296ec254825d3; QUAD $0x4855d162e9fe4855; QUAD $0x626f487e7162e8fe; QUAD $0x62c4fe485d51623b; QUAD $0x2df162c96f487e71; QUAD $0x4825f16206c17248; QUAD $0x72481df1620bc172; QUAD $0xcb25486d736219c1; QUAD $0x62c3fe483d1162ca; QUAD $0x516296d425482553; QUAD $0x483d5162c1fe483d; QUAD $0xc0fe487dd162c2fe; QUAD $0x6202c572485df162; QUAD $0xf1620dc5724825f1; QUAD $0x7e716216c572481d; QUAD $0x25484d7362cd6f48; QUAD $0xe4254825d362e8cf; QUAD $0x62e1fe485dd16296; QUAD $0x7e7162e0fe485dd1; QUAD $0x486551623c626f48; QUAD $0xc86f487e7162c4fe; QUAD $0x6206c072482df162; QUAD $0xf1620bc0724825f1; QUAD $0x75736219c072481d; QUAD $0x483d1162caca2548; QUAD $0xd42548255362c4fe; QUAD $0x62c1fe483d516296; QUAD $0x45d162c2fe483d51; QUAD $0x724865f162f8fe48; QUAD $0xc4724825f16202c4; QUAD $0x16c472481df1620d; QUAD $0x7362cc6f487e7162; QUAD $0x25d362e8ce254855; QUAD $0x4865d16296dc2548; QUAD $0xd8fe4865d162d9fe; QUAD $0x623d626f487e7162; QUAD $0x7e7162c4fe486d51; QUAD $0x72482df162cf6f48; QUAD $0xc7724825f16206c7; QUAD $0x19c772481df1620b; QUAD $0x62cac925487d7362; QUAD $0x255362c5fe483d11; QUAD $0x483d516296d42548; QUAD $0xc2fe483d5162c1fe; QUAD $0xf162f0fe484dd162; QUAD $0x25f16202c372486d; QUAD $0x481df1620dc37248; QUAD $0x6f487e716216c372; QUAD $0xe8cd25485d7362cb; QUAD $0x6296d4254825d362; QUAD $0x6dd162d1fe486dd1; QUAD $0x6f487e7162d0fe48; QUAD $0xc4fe487551623e62; QUAD $0xf162ce6f487e7162; QUAD $0x25f16206c672482d; QUAD $0x481df1620bc67248; QUAD $0x254845736219c672; QUAD $0xc6fe483d1162cac8; QUAD $0x6296d42548255362; QUAD $0x3d5162c1fe483d51; QUAD $0xfe4855d162c2fe48; QUAD $0x02c2724875f162e8; QUAD $0x620dc2724825f162; QUAD $0x716216c272481df1; QUAD $0x48657362ca6f487e; QUAD $0x254825d362e8cc25; QUAD $0xc9fe4875d16296cc; QUAD $0x7162c8fe4875d162; QUAD $0x7d51623f626f487e; QUAD $0x6f487e7162c4fe48; QUAD $0x06c572482df162cd; QUAD $0x620bc5724825f162; QUAD $0x736219c572481df1; QUAD $0x3d1162cacf25484d; QUAD $0x2548255362c7fe48; QUAD $0xc1fe483d516296d4; QUAD $0xd162c2fe483d5162; QUAD $0x487df162e0fe485d; QUAD $0x724825f16202c172; QUAD $0xc172481df1620dc1; QUAD $0x62c96f487e716216; QUAD $0xd362e8cb25486d73; QUAD $0x7dd16296c4254825; QUAD $0xfe487dd162c1fe48; QUAD $0x40626f487e7162c0; QUAD $0xd162d86f487e7162; QUAD $0x7dd16224046f487e; QUAD $0x6f487e7162c3fe49; QUAD $0x244c6f487ed162d9; QUAD $0x62cbfe4975d16201; QUAD $0x7ed162da6f487e71; QUAD $0x6dd1620224546f48; QUAD $0x6f487e7162d3fe49; QUAD $0x245c6f487ed162db; QUAD $0x62dbfe4965d16203; QUAD $0x7ed162dc6f487e71; QUAD $0x5dd1620424646f48; QUAD $0x6f487e7162e3fe49; QUAD $0x246c6f487ed162dd; QUAD $0x62ebfe4955d16205; QUAD $0x7ed162de6f487e71; QUAD $0x4dd1620624746f48; QUAD $0x6f487e7162f3fe49; QUAD $0x247c6f487ed162df; QUAD $0x62fbfe4945d16207; QUAD $0x7ef162077f487ef1; QUAD $0x487ef162014f7f48; QUAD $0x7f487ef16202577f; QUAD $0x677f487ef162035f; QUAD $0x056f7f487ef16204; QUAD $0x6206777f487ef162; LONG $0x7f487ef1; WORD $0x077f
-	VZEROUPPER
-	RET
-
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x000(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x008(SB)/8, $0x0c0d0e0f08090a0b
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x010(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x018(SB)/8, $0x0c0d0e0f08090a0b
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x020(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x028(SB)/8, $0x0c0d0e0f08090a0b
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x030(SB)/8, $0x0405060700010203
-DATA PSHUFFLE_BYTE_FLIP_MASK<>+0x038(SB)/8, $0x0c0d0e0f08090a0b
-GLOBL PSHUFFLE_BYTE_FLIP_MASK<>(SB), 8, $64
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x000(SB)/8, $0x0000000000000000
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x008(SB)/8, $0x0000000000000001
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x010(SB)/8, $0x0000000000000008
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x018(SB)/8, $0x0000000000000009
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x020(SB)/8, $0x0000000000000004
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x028(SB)/8, $0x0000000000000005
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x030(SB)/8, $0x000000000000000C
-DATA PSHUFFLE_TRANSPOSE16_MASK1<>+0x038(SB)/8, $0x000000000000000D
-GLOBL PSHUFFLE_TRANSPOSE16_MASK1<>(SB), 8, $64
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x000(SB)/8, $0x0000000000000002
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x008(SB)/8, $0x0000000000000003
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x010(SB)/8, $0x000000000000000A
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x018(SB)/8, $0x000000000000000B
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x020(SB)/8, $0x0000000000000006
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x028(SB)/8, $0x0000000000000007
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x030(SB)/8, $0x000000000000000E
-DATA PSHUFFLE_TRANSPOSE16_MASK2<>+0x038(SB)/8, $0x000000000000000F
-GLOBL PSHUFFLE_TRANSPOSE16_MASK2<>(SB), 8, $64
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.go b/vendor/github.com/minio/sha256-simd/sha256block_amd64.go
deleted file mode 100644
index e536f54e..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256block_amd64.go
+++ /dev/null
@@ -1,31 +0,0 @@
-//go:build !noasm && !appengine && gc
-// +build !noasm,!appengine,gc
-
-/*
- * Minio Cloud Storage, (C) 2016 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package sha256
-
-func blockArmSha2Go(dig *digest, p []byte) {
-	panic("blockArmSha2Go called unexpectedly")
-}
-
-//go:noescape
-func blockIntelSha(h *[8]uint32, message []uint8)
-
-func blockIntelShaGo(dig *digest, p []byte) {
-	blockIntelSha(&dig.h, p)
-}
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.s b/vendor/github.com/minio/sha256-simd/sha256block_amd64.s
deleted file mode 100644
index c98a1d8f..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256block_amd64.s
+++ /dev/null
@@ -1,266 +0,0 @@
-//+build !noasm,!appengine,gc
-
-// SHA intrinsic version of SHA256
-
-// Kristofer Peterson, (C) 2018.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-#include "textflag.h"
-
-DATA K<>+0x00(SB)/4, $0x428a2f98
-DATA K<>+0x04(SB)/4, $0x71374491
-DATA K<>+0x08(SB)/4, $0xb5c0fbcf
-DATA K<>+0x0c(SB)/4, $0xe9b5dba5
-DATA K<>+0x10(SB)/4, $0x3956c25b
-DATA K<>+0x14(SB)/4, $0x59f111f1
-DATA K<>+0x18(SB)/4, $0x923f82a4
-DATA K<>+0x1c(SB)/4, $0xab1c5ed5
-DATA K<>+0x20(SB)/4, $0xd807aa98
-DATA K<>+0x24(SB)/4, $0x12835b01
-DATA K<>+0x28(SB)/4, $0x243185be
-DATA K<>+0x2c(SB)/4, $0x550c7dc3
-DATA K<>+0x30(SB)/4, $0x72be5d74
-DATA K<>+0x34(SB)/4, $0x80deb1fe
-DATA K<>+0x38(SB)/4, $0x9bdc06a7
-DATA K<>+0x3c(SB)/4, $0xc19bf174
-DATA K<>+0x40(SB)/4, $0xe49b69c1
-DATA K<>+0x44(SB)/4, $0xefbe4786
-DATA K<>+0x48(SB)/4, $0x0fc19dc6
-DATA K<>+0x4c(SB)/4, $0x240ca1cc
-DATA K<>+0x50(SB)/4, $0x2de92c6f
-DATA K<>+0x54(SB)/4, $0x4a7484aa
-DATA K<>+0x58(SB)/4, $0x5cb0a9dc
-DATA K<>+0x5c(SB)/4, $0x76f988da
-DATA K<>+0x60(SB)/4, $0x983e5152
-DATA K<>+0x64(SB)/4, $0xa831c66d
-DATA K<>+0x68(SB)/4, $0xb00327c8
-DATA K<>+0x6c(SB)/4, $0xbf597fc7
-DATA K<>+0x70(SB)/4, $0xc6e00bf3
-DATA K<>+0x74(SB)/4, $0xd5a79147
-DATA K<>+0x78(SB)/4, $0x06ca6351
-DATA K<>+0x7c(SB)/4, $0x14292967
-DATA K<>+0x80(SB)/4, $0x27b70a85
-DATA K<>+0x84(SB)/4, $0x2e1b2138
-DATA K<>+0x88(SB)/4, $0x4d2c6dfc
-DATA K<>+0x8c(SB)/4, $0x53380d13
-DATA K<>+0x90(SB)/4, $0x650a7354
-DATA K<>+0x94(SB)/4, $0x766a0abb
-DATA K<>+0x98(SB)/4, $0x81c2c92e
-DATA K<>+0x9c(SB)/4, $0x92722c85
-DATA K<>+0xa0(SB)/4, $0xa2bfe8a1
-DATA K<>+0xa4(SB)/4, $0xa81a664b
-DATA K<>+0xa8(SB)/4, $0xc24b8b70
-DATA K<>+0xac(SB)/4, $0xc76c51a3
-DATA K<>+0xb0(SB)/4, $0xd192e819
-DATA K<>+0xb4(SB)/4, $0xd6990624
-DATA K<>+0xb8(SB)/4, $0xf40e3585
-DATA K<>+0xbc(SB)/4, $0x106aa070
-DATA K<>+0xc0(SB)/4, $0x19a4c116
-DATA K<>+0xc4(SB)/4, $0x1e376c08
-DATA K<>+0xc8(SB)/4, $0x2748774c
-DATA K<>+0xcc(SB)/4, $0x34b0bcb5
-DATA K<>+0xd0(SB)/4, $0x391c0cb3
-DATA K<>+0xd4(SB)/4, $0x4ed8aa4a
-DATA K<>+0xd8(SB)/4, $0x5b9cca4f
-DATA K<>+0xdc(SB)/4, $0x682e6ff3
-DATA K<>+0xe0(SB)/4, $0x748f82ee
-DATA K<>+0xe4(SB)/4, $0x78a5636f
-DATA K<>+0xe8(SB)/4, $0x84c87814
-DATA K<>+0xec(SB)/4, $0x8cc70208
-DATA K<>+0xf0(SB)/4, $0x90befffa
-DATA K<>+0xf4(SB)/4, $0xa4506ceb
-DATA K<>+0xf8(SB)/4, $0xbef9a3f7
-DATA K<>+0xfc(SB)/4, $0xc67178f2
-GLOBL K<>(SB), RODATA|NOPTR, $256
-
-DATA SHUF_MASK<>+0x00(SB)/8, $0x0405060700010203
-DATA SHUF_MASK<>+0x08(SB)/8, $0x0c0d0e0f08090a0b
-GLOBL SHUF_MASK<>(SB), RODATA|NOPTR, $16
-
-// Register Usage
-// BX  base address of constant table (constant)
-// DX  hash_state (constant)
-// SI  hash_data.data
-// DI  hash_data.data + hash_data.length - 64 (constant)
-// X0  scratch
-// X1  scratch
-// X2  working hash state // ABEF
-// X3  working hash state // CDGH
-// X4  first 16 bytes of block
-// X5  second 16 bytes of block
-// X6  third 16 bytes of block
-// X7  fourth 16 bytes of block
-// X12 saved hash state // ABEF
-// X13 saved hash state // CDGH
-// X15 data shuffle mask (constant)
-
-TEXT ·blockIntelSha(SB), NOSPLIT, $0-32
-	MOVQ      h+0(FP), DX
-	MOVQ      message_base+8(FP), SI
-	MOVQ      message_len+16(FP), DI
-	LEAQ      -64(SI)(DI*1), DI
-	MOVOU     (DX), X2
-	MOVOU     16(DX), X1
-	MOVO      X2, X3
-	PUNPCKLLQ X1, X2
-	PUNPCKHLQ X1, X3
-	PSHUFD    $0x27, X2, X2
-	PSHUFD    $0x27, X3, X3
-	MOVO      SHUF_MASK<>(SB), X15
-	LEAQ      K<>(SB), BX
-
-	JMP TEST
-
-LOOP:
-	MOVO X2, X12
-	MOVO X3, X13
-
-	// load block and shuffle
-	MOVOU  (SI), X4
-	MOVOU  16(SI), X5
-	MOVOU  32(SI), X6
-	MOVOU  48(SI), X7
-	PSHUFB X15, X4
-	PSHUFB X15, X5
-	PSHUFB X15, X6
-	PSHUFB X15, X7
-
-#define ROUND456 \
-	PADDL  X5, X0                    \
-	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
-	MOVO   X5, X1                    \
-	LONG   $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1, XMM4, 4
-	PADDL  X1, X6                    \
-	LONG   $0xf5cd380f               \ // SHA256MSG2 XMM6, XMM5
-	PSHUFD $0x4e, X0, X0             \
-	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
-	LONG   $0xe5cc380f               // SHA256MSG1 XMM4, XMM5
-
-#define ROUND567 \
-	PADDL  X6, X0                    \
-	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
-	MOVO   X6, X1                    \
-	LONG   $0x0f3a0f66; WORD $0x04cd \ // PALIGNR XMM1, XMM5, 4
-	PADDL  X1, X7                    \
-	LONG   $0xfecd380f               \ // SHA256MSG2 XMM7, XMM6
-	PSHUFD $0x4e, X0, X0             \
-	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
-	LONG   $0xeecc380f               // SHA256MSG1 XMM5, XMM6
-
-#define ROUND674 \
-	PADDL  X7, X0                    \
-	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
-	MOVO   X7, X1                    \
-	LONG   $0x0f3a0f66; WORD $0x04ce \ // PALIGNR XMM1, XMM6, 4
-	PADDL  X1, X4                    \
-	LONG   $0xe7cd380f               \ // SHA256MSG2 XMM4, XMM7
-	PSHUFD $0x4e, X0, X0             \
-	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
-	LONG   $0xf7cc380f               // SHA256MSG1 XMM6, XMM7
-
-#define ROUND745 \
-	PADDL  X4, X0                    \
-	LONG   $0xdacb380f               \ // SHA256RNDS2 XMM3, XMM2
-	MOVO   X4, X1                    \
-	LONG   $0x0f3a0f66; WORD $0x04cf \ // PALIGNR XMM1, XMM7, 4
-	PADDL  X1, X5                    \
-	LONG   $0xeccd380f               \ // SHA256MSG2 XMM5, XMM4
-	PSHUFD $0x4e, X0, X0             \
-	LONG   $0xd3cb380f               \ // SHA256RNDS2 XMM2, XMM3
-	LONG   $0xfccc380f               // SHA256MSG1 XMM7, XMM4
-
-	// rounds 0-3
-	MOVO   (BX), X0
-	PADDL  X4, X0
-	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
-	PSHUFD $0x4e, X0, X0
-	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
-
-	// rounds 4-7
-	MOVO   1*16(BX), X0
-	PADDL  X5, X0
-	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
-	PSHUFD $0x4e, X0, X0
-	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
-	LONG   $0xe5cc380f   // SHA256MSG1 XMM4, XMM5
-
-	// rounds 8-11
-	MOVO   2*16(BX), X0
-	PADDL  X6, X0
-	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
-	PSHUFD $0x4e, X0, X0
-	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
-	LONG   $0xeecc380f   // SHA256MSG1 XMM5, XMM6
-
-	MOVO 3*16(BX), X0; ROUND674  // rounds 12-15
-	MOVO 4*16(BX), X0; ROUND745  // rounds 16-19
-	MOVO 5*16(BX), X0; ROUND456  // rounds 20-23
-	MOVO 6*16(BX), X0; ROUND567  // rounds 24-27
-	MOVO 7*16(BX), X0; ROUND674  // rounds 28-31
-	MOVO 8*16(BX), X0; ROUND745  // rounds 32-35
-	MOVO 9*16(BX), X0; ROUND456  // rounds 36-39
-	MOVO 10*16(BX), X0; ROUND567 // rounds 40-43
-	MOVO 11*16(BX), X0; ROUND674 // rounds 44-47
-	MOVO 12*16(BX), X0; ROUND745 // rounds 48-51
-
-	// rounds 52-55
-	MOVO   13*16(BX), X0
-	PADDL  X5, X0
-	LONG   $0xdacb380f               // SHA256RNDS2 XMM3, XMM2
-	MOVO   X5, X1
-	LONG   $0x0f3a0f66; WORD $0x04cc // PALIGNR XMM1, XMM4, 4
-	PADDL  X1, X6
-	LONG   $0xf5cd380f               // SHA256MSG2 XMM6, XMM5
-	PSHUFD $0x4e, X0, X0
-	LONG   $0xd3cb380f               // SHA256RNDS2 XMM2, XMM3
-
-	// rounds 56-59
-	MOVO   14*16(BX), X0
-	PADDL  X6, X0
-	LONG   $0xdacb380f               // SHA256RNDS2 XMM3, XMM2
-	MOVO   X6, X1
-	LONG   $0x0f3a0f66; WORD $0x04cd // PALIGNR XMM1, XMM5, 4
-	PADDL  X1, X7
-	LONG   $0xfecd380f               // SHA256MSG2 XMM7, XMM6
-	PSHUFD $0x4e, X0, X0
-	LONG   $0xd3cb380f               // SHA256RNDS2 XMM2, XMM3
-
-	// rounds 60-63
-	MOVO   15*16(BX), X0
-	PADDL  X7, X0
-	LONG   $0xdacb380f   // SHA256RNDS2 XMM3, XMM2
-	PSHUFD $0x4e, X0, X0
-	LONG   $0xd3cb380f   // SHA256RNDS2 XMM2, XMM3
-
-	PADDL X12, X2
-	PADDL X13, X3
-
-	ADDQ $64, SI
-
-TEST:
-	CMPQ SI, DI
-	JBE  LOOP
-
-	PSHUFD $0x4e, X3, X0
-	LONG   $0x0e3a0f66; WORD $0xf0c2 // PBLENDW XMM0, XMM2, 0xf0
-	PSHUFD $0x4e, X2, X1
-	LONG   $0x0e3a0f66; WORD $0x0fcb // PBLENDW XMM1, XMM3, 0x0f
-	PSHUFD $0x1b, X0, X0
-	PSHUFD $0x1b, X1, X1
-
-	MOVOU X0, (DX)
-	MOVOU X1, 16(DX)
-
-	RET
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_arm64.go b/vendor/github.com/minio/sha256-simd/sha256block_arm64.go
deleted file mode 100644
index d4369e24..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256block_arm64.go
+++ /dev/null
@@ -1,37 +0,0 @@
-//go:build !noasm && !appengine && gc
-// +build !noasm,!appengine,gc
-
-/*
- * Minio Cloud Storage, (C) 2016 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package sha256
-
-func blockIntelShaGo(dig *digest, p []byte) {
-	panic("blockIntelShaGo called unexpectedly")
-}
-
-//go:noescape
-func blockArmSha2(h []uint32, message []uint8)
-
-func blockArmSha2Go(dig *digest, p []byte) {
-
-	h := []uint32{dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]}
-
-	blockArmSha2(h[:], p[:])
-
-	dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4],
-		h[5], h[6], h[7]
-}
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_arm64.s b/vendor/github.com/minio/sha256-simd/sha256block_arm64.s
deleted file mode 100644
index 7ab88b16..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256block_arm64.s
+++ /dev/null
@@ -1,192 +0,0 @@
-//+build !noasm,!appengine,gc
-
-// ARM64 version of SHA256
-
-//
-// Minio Cloud Storage, (C) 2016 Minio, Inc.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//     http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-//
-
-//
-// Based on implementation as found in https://github.com/jocover/sha256-armv8
-//
-// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to
-// their Plan9 equivalents
-//
-
-TEXT ·blockArmSha2(SB), 7, $0
-	MOVD h+0(FP), R0
-	MOVD message+24(FP), R1
-	MOVD message_len+32(FP), R2 // length of message
-	SUBS $64, R2
-	BMI  complete
-
-	// Load constants table pointer
-	MOVD $·constants(SB), R3
-
-	// Cache constants table in registers v16 - v31
-	WORD $0x4cdf2870 // ld1	{v16.4s-v19.4s}, [x3], #64
-	WORD $0x4cdf7800 // ld1	{v0.4s}, [x0], #16
-	WORD $0x4cdf2874 // ld1	{v20.4s-v23.4s}, [x3], #64
-
-	WORD $0x4c407801 // ld1	{v1.4s}, [x0]
-	WORD $0x4cdf2878 // ld1	{v24.4s-v27.4s}, [x3], #64
-	WORD $0xd1004000 // sub	x0, x0, #0x10
-	WORD $0x4cdf287c // ld1	{v28.4s-v31.4s}, [x3], #64
-
-loop:
-	// Main loop
-	WORD $0x4cdf2025 // ld1	{v5.16b-v8.16b}, [x1], #64
-	WORD $0x4ea01c02 // mov	v2.16b, v0.16b
-	WORD $0x4ea11c23 // mov	v3.16b, v1.16b
-	WORD $0x6e2008a5 // rev32	v5.16b, v5.16b
-	WORD $0x6e2008c6 // rev32	v6.16b, v6.16b
-	WORD $0x4eb084a9 // add	v9.4s, v5.4s, v16.4s
-	WORD $0x6e2008e7 // rev32	v7.16b, v7.16b
-	WORD $0x4eb184ca // add	v10.4s, v6.4s, v17.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
-	WORD $0x6e200908 // rev32	v8.16b, v8.16b
-	WORD $0x4eb284e9 // add	v9.4s, v7.4s, v18.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
-	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
-	WORD $0x4eb3850a // add	v10.4s, v8.4s, v19.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
-	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
-	WORD $0x4eb484a9 // add	v9.4s, v5.4s, v20.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
-	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
-	WORD $0x4eb584ca // add	v10.4s, v6.4s, v21.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
-	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
-	WORD $0x4eb684e9 // add	v9.4s, v7.4s, v22.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
-	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
-	WORD $0x4eb7850a // add	v10.4s, v8.4s, v23.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
-	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
-	WORD $0x4eb884a9 // add	v9.4s, v5.4s, v24.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
-	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
-	WORD $0x4eb984ca // add	v10.4s, v6.4s, v25.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x5e2828c5 // sha256su0	v5.4s, v6.4s
-	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
-	WORD $0x4eba84e9 // add	v9.4s, v7.4s, v26.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x5e2828e6 // sha256su0	v6.4s, v7.4s
-	WORD $0x5e0860e5 // sha256su1	v5.4s, v7.4s, v8.4s
-	WORD $0x4ebb850a // add	v10.4s, v8.4s, v27.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x5e282907 // sha256su0	v7.4s, v8.4s
-	WORD $0x5e056106 // sha256su1	v6.4s, v8.4s, v5.4s
-	WORD $0x4ebc84a9 // add	v9.4s, v5.4s, v28.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x5e2828a8 // sha256su0	v8.4s, v5.4s
-	WORD $0x5e0660a7 // sha256su1	v7.4s, v5.4s, v6.4s
-	WORD $0x4ebd84ca // add	v10.4s, v6.4s, v29.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x5e0760c8 // sha256su1	v8.4s, v6.4s, v7.4s
-	WORD $0x4ebe84e9 // add	v9.4s, v7.4s, v30.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x4ebf850a // add	v10.4s, v8.4s, v31.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e094062 // sha256h	q2, q3, v9.4s
-	WORD $0x5e095083 // sha256h2	q3, q4, v9.4s
-	WORD $0x4ea21c44 // mov	v4.16b, v2.16b
-	WORD $0x5e0a4062 // sha256h	q2, q3, v10.4s
-	WORD $0x5e0a5083 // sha256h2	q3, q4, v10.4s
-	WORD $0x4ea38421 // add	v1.4s, v1.4s, v3.4s
-	WORD $0x4ea28400 // add	v0.4s, v0.4s, v2.4s
-
-	SUBS $64, R2
-	BPL  loop
-
-	// Store result
-	WORD $0x4c00a800 // st1	{v0.4s, v1.4s}, [x0]
-
-complete:
-	RET
-
-// Constants table
-DATA ·constants+0x0(SB)/8, $0x71374491428a2f98
-DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf
-DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b
-DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4
-DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98
-DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be
-DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74
-DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7
-DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1
-DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6
-DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f
-DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc
-DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152
-DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8
-DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3
-DATA ·constants+0x78(SB)/8, $0x1429296706ca6351
-DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85
-DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc
-DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354
-DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e
-DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1
-DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70
-DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819
-DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585
-DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116
-DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c
-DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3
-DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f
-DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee
-DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814
-DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa
-DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7
-
-GLOBL ·constants(SB), 8, $256
-
diff --git a/vendor/github.com/minio/sha256-simd/sha256block_other.go b/vendor/github.com/minio/sha256-simd/sha256block_other.go
deleted file mode 100644
index 94d7eb0b..00000000
--- a/vendor/github.com/minio/sha256-simd/sha256block_other.go
+++ /dev/null
@@ -1,29 +0,0 @@
-//go:build appengine || noasm || (!amd64 && !arm64) || !gc
-// +build appengine noasm !amd64,!arm64 !gc
-
-/*
- * Minio Cloud Storage, (C) 2019 Minio, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package sha256
-
-func blockIntelShaGo(dig *digest, p []byte) {
-	panic("blockIntelShaGo called unexpectedly")
-
-}
-
-func blockArmSha2Go(dig *digest, p []byte) {
-	panic("blockArmSha2Go called unexpectedly")
-}
diff --git a/vendor/github.com/minio/sha256-simd/test-architectures.sh b/vendor/github.com/minio/sha256-simd/test-architectures.sh
deleted file mode 100644
index 50150eaa..00000000
--- a/vendor/github.com/minio/sha256-simd/test-architectures.sh
+++ /dev/null
@@ -1,15 +0,0 @@
-#!/bin/sh
-
-set -e
-
-go tool dist list | while IFS=/ read os arch; do
-    echo "Checking $os/$arch..."
-    echo " normal"
-    GOARCH=$arch GOOS=$os go build -o /dev/null ./...
-    echo " noasm"
-    GOARCH=$arch GOOS=$os go build -tags noasm -o /dev/null ./...
-    echo " appengine"
-    GOARCH=$arch GOOS=$os go build -tags appengine -o /dev/null ./...
-    echo " noasm,appengine"
-    GOARCH=$arch GOOS=$os go build -tags 'appengine noasm' -o /dev/null ./...
-done
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_prom.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_prom.go
index 97222b9f..6780a43a 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_prom.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_prom.go
@@ -53,6 +53,7 @@ type MetricsItem struct {
 	ValueKey   string                    `yaml:"valueKey" json:"valueKey" doc:"entry key from which to resolve metric value"`
 	Labels     []string                  `yaml:"labels" json:"labels" doc:"labels to be associated with the metric"`
 	Remap      map[string]string         `yaml:"remap" json:"remap" doc:"optional remapping of labels"`
+	Flatten    []string                  `yaml:"flatten" json:"flatten" doc:"list fields to be flattened"`
 	Buckets    []float64                 `yaml:"buckets" json:"buckets" doc:"histogram buckets"`
 	ValueScale float64                   `yaml:"valueScale,omitempty" json:"valueScale,omitempty" doc:"scale factor of the value (MetricVal := FlowVal / Scale)"`
 }
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_s3.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_s3.go
index 346bbb1a..71cc6d6f 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_s3.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/encode_s3.go
@@ -28,5 +28,5 @@ type EncodeS3 struct {
 	Secure                 bool                   `yaml:"secure,omitempty" json:"secure,omitempty" doc:"true for https, false for http (default: false)"`
 	ObjectHeaderParameters map[string]interface{} `yaml:"objectHeaderParameters,omitempty" json:"objectHeaderParameters,omitempty" doc:"parameters to include in object header (key/value pairs)"`
 	// TBD: (TLS?) security parameters
-	//TLS                    *ClientTLS             `yaml:"tls" json:"tls" doc:"TLS client configuration (optional)"`
+	// TLS                    *ClientTLS             `yaml:"tls" json:"tls" doc:"TLS client configuration (optional)"`
 }
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_filter.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_filter.go
index 5cc84a75..e41d7cd9 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_filter.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_filter.go
@@ -37,6 +37,7 @@ const (
 	RemoveEntryIfEqual       TransformFilterEnum = "remove_entry_if_equal"        // removes the entry if the field value equals specified value
 	RemoveEntryIfNotEqual    TransformFilterEnum = "remove_entry_if_not_equal"    // removes the entry if the field value does not equal specified value
 	RemoveEntryAllSatisfied  TransformFilterEnum = "remove_entry_all_satisfied"   // removes the entry if all of the defined rules are satisfied
+	KeepEntryAllSatisfied    TransformFilterEnum = "keep_entry_all_satisfied"     // keeps the entry if the set of rules are all satisfied
 	AddField                 TransformFilterEnum = "add_field"                    // adds (input) field to the entry; overrides previous value if present (key=input, value=value)
 	AddFieldIfDoesntExist    TransformFilterEnum = "add_field_if_doesnt_exist"    // adds a field to the entry if the field does not exist
 	AddFieldIf               TransformFilterEnum = "add_field_if"                 // add output field set to assignee if input field satisfies criteria from parameters field
@@ -55,11 +56,24 @@ const (
 	RemoveEntryIfNotEqualD    TransformFilterRemoveEntryEnum = "remove_entry_if_not_equal"    // removes the entry if the field value does not equal specified value
 )
 
+type TransformFilterKeepEntryEnum string
+
+const (
+	KeepEntryIfExists        TransformFilterKeepEntryEnum = "keep_entry_if_exists"          // keeps the entry if the field exists
+	KeepEntryIfDoesntExist   TransformFilterKeepEntryEnum = "keep_entry_if_doesnt_exist"    // keeps the entry if the field does not exist
+	KeepEntryIfEqual         TransformFilterKeepEntryEnum = "keep_entry_if_equal"           // keeps the entry if the field value equals specified value
+	KeepEntryIfNotEqual      TransformFilterKeepEntryEnum = "keep_entry_if_not_equal"       // keeps the entry if the field value does not equal specified value
+	KeepEntryIfRegexMatch    TransformFilterKeepEntryEnum = "keep_entry_if_regex_match"     // keeps the entry if the field value matches the specified regex
+	KeepEntryIfNotRegexMatch TransformFilterKeepEntryEnum = "keep_entry_if_not_regex_match" // keeps the entry if the field value does not match the specified regex
+)
+
 type TransformFilterRule struct {
 	Type                    TransformFilterEnum              `yaml:"type,omitempty" json:"type,omitempty" doc:"(enum) one of the following:"`
 	RemoveField             *TransformFilterGenericRule      `yaml:"removeField,omitempty" json:"removeField,omitempty" doc:"configuration for remove_field rule"`
 	RemoveEntry             *TransformFilterGenericRule      `yaml:"removeEntry,omitempty" json:"removeEntry,omitempty" doc:"configuration for remove_entry_* rules"`
 	RemoveEntryAllSatisfied []*RemoveEntryRule               `yaml:"removeEntryAllSatisfied,omitempty" json:"removeEntryAllSatisfied,omitempty" doc:"configuration for remove_entry_all_satisfied rule"`
+	KeepEntryAllSatisfied   []*KeepEntryRule                 `yaml:"keepEntryAllSatisfied,omitempty" json:"keepEntryAllSatisfied,omitempty" doc:"configuration for keep_entry rule"`
+	KeepEntrySampling       uint16                           `yaml:"keepEntrySampling,omitempty" json:"keepEntrySampling,omitempty" doc:"sampling value for keep_entry type: 1 flow on <sampling> is kept"`
 	AddField                *TransformFilterGenericRule      `yaml:"addField,omitempty" json:"addField,omitempty" doc:"configuration for add_field rule"`
 	AddFieldIfDoesntExist   *TransformFilterGenericRule      `yaml:"addFieldIfDoesntExist,omitempty" json:"addFieldIfDoesntExist,omitempty" doc:"configuration for add_field_if_doesnt_exist rule"`
 	AddFieldIf              *TransformFilterRuleWithAssignee `yaml:"addFieldIf,omitempty" json:"addFieldIf,omitempty" doc:"configuration for add_field_if rule"`
@@ -79,6 +93,9 @@ func (r *TransformFilterRule) preprocess() {
 	for i := range r.RemoveEntryAllSatisfied {
 		r.RemoveEntryAllSatisfied[i].RemoveEntry.preprocess()
 	}
+	for i := range r.KeepEntryAllSatisfied {
+		r.KeepEntryAllSatisfied[i].KeepEntry.preprocess()
+	}
 	for i := range r.ConditionalSampling {
 		r.ConditionalSampling[i].preprocess()
 	}
@@ -110,6 +127,11 @@ type RemoveEntryRule struct {
 	RemoveEntry *TransformFilterGenericRule    `yaml:"removeEntry,omitempty" json:"removeEntry,omitempty" doc:"configuration for remove_entry_* rules"`
 }
 
+type KeepEntryRule struct {
+	Type      TransformFilterKeepEntryEnum `yaml:"type,omitempty" json:"type,omitempty" doc:"(enum) one of the following:"`
+	KeepEntry *TransformFilterGenericRule  `yaml:"keepEntry,omitempty" json:"keepEntry,omitempty" doc:"configuration for keep_entry_* rules"`
+}
+
 type SamplingCondition struct {
 	Value uint16             `yaml:"value,omitempty" json:"value,omitempty" doc:"sampling value: 1 flow on <sampling> is kept"`
 	Rules []*RemoveEntryRule `yaml:"rules,omitempty" json:"rules,omitempty" doc:"rules to be satisfied for this sampling configuration"`
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_network.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_network.go
index baf92223..a83e385d 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_network.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/api/transform_network.go
@@ -59,6 +59,7 @@ const (
 	NetworkAddKubernetesInfra   TransformNetworkOperationEnum = "add_kubernetes_infra"  // add output kubernetes isInfra field from input
 	NetworkReinterpretDirection TransformNetworkOperationEnum = "reinterpret_direction" // reinterpret flow direction at the node level (instead of net interface), to ease the deduplication process
 	NetworkAddSubnetLabel       TransformNetworkOperationEnum = "add_subnet_label"      // categorize IPs based on known subnets configuration
+	NetworkDecodeTCPFlags       TransformNetworkOperationEnum = "decode_tcp_flags"      // decode bitwise TCP flags into a string
 )
 
 type NetworkTransformRule struct {
@@ -69,6 +70,7 @@ type NetworkTransformRule struct {
 	AddLocation     *NetworkGenericRule           `yaml:"add_location,omitempty" json:"add_location,omitempty" doc:"Add location rule configuration"`
 	AddSubnetLabel  *NetworkAddSubnetLabelRule    `yaml:"add_subnet_label,omitempty" json:"add_subnet_label,omitempty" doc:"Add subnet label rule configuration"`
 	AddService      *NetworkAddServiceRule        `yaml:"add_service,omitempty" json:"add_service,omitempty" doc:"Add service rule configuration"`
+	DecodeTCPFlags  *NetworkGenericRule           `yaml:"decode_tcp_flags,omitempty" json:"decode_tcp_flags,omitempty" doc:"Decode bitwise TCP flags into a string"`
 }
 
 type K8sInfraRule struct {
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom.go
index f90d0be5..dbf97499 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom.go
@@ -25,6 +25,7 @@ import (
 	"github.com/netobserv/flowlogs-pipeline/pkg/api"
 	"github.com/netobserv/flowlogs-pipeline/pkg/config"
 	"github.com/netobserv/flowlogs-pipeline/pkg/operational"
+	"github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics"
 	promserver "github.com/netobserv/flowlogs-pipeline/pkg/prometheus"
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/sirupsen/logrus"
@@ -114,25 +115,25 @@ func (e *EncodeProm) Cleanup(cleanupFunc interface{}) {
 	cleanupFunc.(func())()
 }
 
-func (e *EncodeProm) addCounter(fullMetricName string, mInfo *MetricInfo) prometheus.Collector {
+func (e *EncodeProm) addCounter(fullMetricName string, mInfo *metrics.Preprocessed) prometheus.Collector {
 	counter := prometheus.NewCounterVec(prometheus.CounterOpts{Name: fullMetricName, Help: ""}, mInfo.TargetLabels())
 	e.metricCommon.AddCounter(fullMetricName, counter, mInfo)
 	return counter
 }
 
-func (e *EncodeProm) addGauge(fullMetricName string, mInfo *MetricInfo) prometheus.Collector {
+func (e *EncodeProm) addGauge(fullMetricName string, mInfo *metrics.Preprocessed) prometheus.Collector {
 	gauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{Name: fullMetricName, Help: ""}, mInfo.TargetLabels())
 	e.metricCommon.AddGauge(fullMetricName, gauge, mInfo)
 	return gauge
 }
 
-func (e *EncodeProm) addHistogram(fullMetricName string, mInfo *MetricInfo) prometheus.Collector {
+func (e *EncodeProm) addHistogram(fullMetricName string, mInfo *metrics.Preprocessed) prometheus.Collector {
 	histogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{Name: fullMetricName, Help: ""}, mInfo.TargetLabels())
 	e.metricCommon.AddHist(fullMetricName, histogram, mInfo)
 	return histogram
 }
 
-func (e *EncodeProm) addAgghistogram(fullMetricName string, mInfo *MetricInfo) prometheus.Collector {
+func (e *EncodeProm) addAgghistogram(fullMetricName string, mInfo *metrics.Preprocessed) prometheus.Collector {
 	agghistogram := prometheus.NewHistogramVec(prometheus.HistogramOpts{Name: fullMetricName, Help: ""}, mInfo.TargetLabels())
 	e.metricCommon.AddAggHist(fullMetricName, agghistogram, mInfo)
 	return agghistogram
@@ -176,10 +177,10 @@ func (e *EncodeProm) cleanDeletedMetrics(newCfg api.PromEncode) {
 }
 
 // returns true if a registry restart is needed
-func (e *EncodeProm) checkMetricUpdate(prefix string, apiItem *api.MetricsItem, store map[string]mInfoStruct, createMetric func(string, *MetricInfo) prometheus.Collector) bool {
+func (e *EncodeProm) checkMetricUpdate(prefix string, apiItem *api.MetricsItem, store map[string]mInfoStruct, createMetric func(string, *metrics.Preprocessed) prometheus.Collector) bool {
 	fullMetricName := prefix + apiItem.Name
 	plog.Debugf("Checking metric: %s", fullMetricName)
-	mInfo := CreateMetricInfo(apiItem)
+	mInfo := metrics.Preprocess(apiItem)
 	if oldMetric, ok := store[fullMetricName]; ok {
 		if !reflect.DeepEqual(mInfo.TargetLabels(), oldMetric.info.TargetLabels()) {
 			plog.Debug("Changes detected in labels")
@@ -245,7 +246,7 @@ func (e *EncodeProm) checkConfUpdate() {
 			break
 		}
 	default:
-		//Nothing to do
+		// Nothing to do
 		return
 	}
 }
@@ -257,7 +258,7 @@ func (e *EncodeProm) resetRegistry() {
 	for i := range e.cfg.Metrics {
 		mCfg := &e.cfg.Metrics[i]
 		fullMetricName := e.cfg.Prefix + mCfg.Name
-		mInfo := CreateMetricInfo(mCfg)
+		mInfo := metrics.Preprocess(mCfg)
 		plog.Debugf("Create metric: %s, Labels: %v", fullMetricName, mInfo.TargetLabels())
 		var m prometheus.Collector
 		switch mCfg.Type {
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom_metric.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom_metric.go
deleted file mode 100644
index d5711279..00000000
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/encode_prom_metric.go
+++ /dev/null
@@ -1,150 +0,0 @@
-package encode
-
-import (
-	"fmt"
-	"regexp"
-	"strings"
-
-	"github.com/netobserv/flowlogs-pipeline/pkg/api"
-	"github.com/netobserv/flowlogs-pipeline/pkg/config"
-)
-
-type Predicate func(flow config.GenericMap) bool
-
-var variableExtractor, _ = regexp.Compile(`\$\(([^\)]+)\)`)
-
-type MetricInfo struct {
-	*api.MetricsItem
-	FilterPredicates []Predicate
-	MappedLabels     []MappedLabel
-}
-
-type MappedLabel struct {
-	Source string
-	Target string
-}
-
-func (m *MetricInfo) TargetLabels() []string {
-	var targetLabels []string
-	for _, l := range m.MappedLabels {
-		targetLabels = append(targetLabels, l.Target)
-	}
-	return targetLabels
-}
-
-func Presence(filter api.MetricsFilter) Predicate {
-	return func(flow config.GenericMap) bool {
-		_, found := flow[filter.Key]
-		return found
-	}
-}
-
-func Absence(filter api.MetricsFilter) Predicate {
-	return func(flow config.GenericMap) bool {
-		_, found := flow[filter.Key]
-		return !found
-	}
-}
-
-func Equal(filter api.MetricsFilter) Predicate {
-	varLookups := extractVarLookups(filter.Value)
-	return func(flow config.GenericMap) bool {
-		if val, found := flow[filter.Key]; found {
-			sVal, ok := val.(string)
-			if !ok {
-				sVal = fmt.Sprint(val)
-			}
-			value := filter.Value
-			if len(varLookups) > 0 {
-				value = injectVars(flow, value, varLookups)
-			}
-			return sVal == value
-		}
-		return false
-	}
-}
-
-func NotEqual(filter api.MetricsFilter) Predicate {
-	pred := Equal(filter)
-	return func(flow config.GenericMap) bool { return !pred(flow) }
-}
-
-func Regex(filter api.MetricsFilter) Predicate {
-	r, _ := regexp.Compile(filter.Value)
-	return func(flow config.GenericMap) bool {
-		if val, found := flow[filter.Key]; found {
-			sVal, ok := val.(string)
-			if !ok {
-				sVal = fmt.Sprint(val)
-			}
-			return r.MatchString(sVal)
-		}
-		return false
-	}
-}
-
-func NotRegex(filter api.MetricsFilter) Predicate {
-	pred := Regex(filter)
-	return func(flow config.GenericMap) bool { return !pred(flow) }
-}
-
-func filterToPredicate(filter api.MetricsFilter) Predicate {
-	switch filter.Type {
-	case api.MetricFilterEqual:
-		return Equal(filter)
-	case api.MetricFilterNotEqual:
-		return NotEqual(filter)
-	case api.MetricFilterPresence:
-		return Presence(filter)
-	case api.MetricFilterAbsence:
-		return Absence(filter)
-	case api.MetricFilterRegex:
-		return Regex(filter)
-	case api.MetricFilterNotRegex:
-		return NotRegex(filter)
-	}
-	// Default = Exact
-	return Equal(filter)
-}
-
-func extractVarLookups(value string) [][]string {
-	// Extract list of variables to lookup
-	// E.g: filter "$(SrcAddr):$(SrcPort)" would return [SrcAddr,SrcPort]
-	if len(value) > 0 {
-		return variableExtractor.FindAllStringSubmatch(value, -1)
-	}
-	return nil
-}
-
-func injectVars(flow config.GenericMap, filterValue string, varLookups [][]string) string {
-	injected := filterValue
-	for _, matchGroup := range varLookups {
-		var value string
-		if rawVal, found := flow[matchGroup[1]]; found {
-			if sVal, ok := rawVal.(string); ok {
-				value = sVal
-			} else {
-				value = fmt.Sprint(rawVal)
-			}
-		}
-		injected = strings.ReplaceAll(injected, matchGroup[0], value)
-	}
-	return injected
-}
-
-func CreateMetricInfo(def *api.MetricsItem) *MetricInfo {
-	mi := MetricInfo{
-		MetricsItem: def,
-	}
-	for _, l := range def.Labels {
-		ml := MappedLabel{Source: l, Target: l}
-		if as := def.Remap[l]; as != "" {
-			ml.Target = as
-		}
-		mi.MappedLabels = append(mi.MappedLabels, ml)
-	}
-	for _, f := range def.Filters {
-		mi.FilterPredicates = append(mi.FilterPredicates, filterToPredicate(f))
-	}
-	return &mi
-}
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/filtering.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/filtering.go
new file mode 100644
index 00000000..885d4ae2
--- /dev/null
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/filtering.go
@@ -0,0 +1,28 @@
+package metrics
+
+import "github.com/netobserv/flowlogs-pipeline/pkg/config"
+
+func (p *Preprocessed) ApplyFilters(flow config.GenericMap, flatParts []config.GenericMap) (bool, []config.GenericMap) {
+	filteredParts := flatParts
+	for _, filter := range p.filters {
+		if filter.useFlat {
+			filteredParts = filter.filterFlatParts(filteredParts)
+			if len(filteredParts) == 0 {
+				return false, nil
+			}
+		} else if !filter.predicate(flow) {
+			return false, nil
+		}
+	}
+	return true, filteredParts
+}
+
+func (pf *preprocessedFilter) filterFlatParts(flatParts []config.GenericMap) []config.GenericMap {
+	var filteredParts []config.GenericMap
+	for _, part := range flatParts {
+		if pf.predicate(part) {
+			filteredParts = append(filteredParts, part)
+		}
+	}
+	return filteredParts
+}
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/flattening.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/flattening.go
new file mode 100644
index 00000000..e9f27ebb
--- /dev/null
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/flattening.go
@@ -0,0 +1,88 @@
+package metrics
+
+import (
+	"github.com/netobserv/flowlogs-pipeline/pkg/config"
+)
+
+func (p *Preprocessed) GenerateFlatParts(flow config.GenericMap) []config.GenericMap {
+	if len(p.MetricsItem.Flatten) == 0 {
+		return nil
+	}
+	// Want to generate sub-flows from {A=foo, B=[{B1=x, B2=y},{B1=z}], C=[foo,bar]}
+	// => {B>B1=x, B>B2=y, C=foo}, {B>B1=z, C=foo}, {B>B1=x, B>B2=y, C=bar}, {B>B1=z, C=bar}
+	var partsPerLabel [][]config.GenericMap
+	for _, fl := range p.MetricsItem.Flatten {
+		if anyVal, ok := flow[fl]; ok {
+			// Intermediate step to get:
+			// [{B>B1=x, B>B2=y}, {B>B1=z}], [C=foo, C=bar]
+			var partsForLabel []config.GenericMap
+			switch v := anyVal.(type) {
+			case []any:
+				prefix := fl + ">"
+				for _, vv := range v {
+					switch vvv := vv.(type) {
+					case config.GenericMap:
+						partsForLabel = append(partsForLabel, flattenNested(prefix, vvv))
+					default:
+						partsForLabel = append(partsForLabel, config.GenericMap{fl: vv})
+					}
+				}
+			case []config.GenericMap:
+				prefix := fl + ">"
+				for _, vv := range v {
+					partsForLabel = append(partsForLabel, flattenNested(prefix, vv))
+				}
+			case []string:
+				for _, vv := range v {
+					partsForLabel = append(partsForLabel, config.GenericMap{fl: vv})
+				}
+			}
+			if len(partsForLabel) > 0 {
+				partsPerLabel = append(partsPerLabel, partsForLabel)
+			}
+		}
+	}
+	return distribute(partsPerLabel)
+}
+
+func distribute(allUnflat [][]config.GenericMap) []config.GenericMap {
+	// turn
+	// [{B>B1=x, B>B2=y}, {B>B1=z}], [{C=foo}, {C=bar}]
+	// into
+	// [{B>B1=x, B>B2=y, C=foo}, {B>B1=z, C=foo}, {B>B1=x, B>B2=y, C=bar}, {B>B1=z, C=bar}]
+	totalCard := 1
+	for _, part := range allUnflat {
+		if len(part) > 1 {
+			totalCard *= len(part)
+		}
+	}
+	ret := make([]config.GenericMap, totalCard)
+	indexes := make([]int, len(allUnflat))
+	for c := range ret {
+		ret[c] = config.GenericMap{}
+		incIndex := false
+		for i, part := range allUnflat {
+			index := indexes[i]
+			for k, v := range part[index] {
+				ret[c][k] = v
+			}
+			if !incIndex {
+				if index+1 == len(part) {
+					indexes[i] = 0
+				} else {
+					indexes[i] = index + 1
+					incIndex = true
+				}
+			}
+		}
+	}
+	return ret
+}
+
+func flattenNested(prefix string, nested config.GenericMap) config.GenericMap {
+	subFlow := config.GenericMap{}
+	for k, v := range nested {
+		subFlow[prefix+k] = v
+	}
+	return subFlow
+}
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/preprocess.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/preprocess.go
new file mode 100644
index 00000000..5aabc831
--- /dev/null
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics/preprocess.go
@@ -0,0 +1,91 @@
+package metrics
+
+import (
+	"regexp"
+	"strings"
+
+	"github.com/netobserv/flowlogs-pipeline/pkg/api"
+	"github.com/netobserv/flowlogs-pipeline/pkg/utils/filters"
+)
+
+type Preprocessed struct {
+	*api.MetricsItem
+	filters         []preprocessedFilter
+	MappedLabels    []MappedLabel
+	FlattenedLabels []MappedLabel
+}
+
+type MappedLabel struct {
+	Source string
+	Target string
+}
+
+type preprocessedFilter struct {
+	predicate filters.Predicate
+	useFlat   bool
+}
+
+func (p *Preprocessed) TargetLabels() []string {
+	var targetLabels []string
+	for _, l := range p.FlattenedLabels {
+		targetLabels = append(targetLabels, l.Target)
+	}
+	for _, l := range p.MappedLabels {
+		targetLabels = append(targetLabels, l.Target)
+	}
+	return targetLabels
+}
+
+func filterToPredicate(filter api.MetricsFilter) filters.Predicate {
+	switch filter.Type {
+	case api.MetricFilterEqual:
+		return filters.Equal(filter.Key, filter.Value, true)
+	case api.MetricFilterNotEqual:
+		return filters.NotEqual(filter.Key, filter.Value, true)
+	case api.MetricFilterPresence:
+		return filters.Presence(filter.Key)
+	case api.MetricFilterAbsence:
+		return filters.Absence(filter.Key)
+	case api.MetricFilterRegex:
+		r, _ := regexp.Compile(filter.Value)
+		return filters.Regex(filter.Key, r)
+	case api.MetricFilterNotRegex:
+		r, _ := regexp.Compile(filter.Value)
+		return filters.NotRegex(filter.Key, r)
+	}
+	// Default = Exact
+	return filters.Equal(filter.Key, filter.Value, true)
+}
+
+func Preprocess(def *api.MetricsItem) *Preprocessed {
+	mi := Preprocessed{
+		MetricsItem: def,
+	}
+	for _, l := range def.Labels {
+		ml := MappedLabel{Source: l, Target: l}
+		if as := def.Remap[l]; as != "" {
+			ml.Target = as
+		}
+		if mi.isFlattened(l) {
+			mi.FlattenedLabels = append(mi.FlattenedLabels, ml)
+		} else {
+			mi.MappedLabels = append(mi.MappedLabels, ml)
+		}
+	}
+	for _, f := range def.Filters {
+		mi.filters = append(mi.filters, preprocessedFilter{
+			predicate: filterToPredicate(f),
+			useFlat:   mi.isFlattened(f.Key),
+		})
+	}
+	return &mi
+}
+
+func (p *Preprocessed) isFlattened(fieldPath string) bool {
+	for _, flat := range p.Flatten {
+		if fieldPath == flat || strings.HasPrefix(fieldPath, flat+">") {
+			return true
+		}
+	}
+	return false
+}
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics_common.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics_common.go
index e45f0925..d420f1af 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics_common.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics_common.go
@@ -24,6 +24,7 @@ import (
 	"github.com/netobserv/flowlogs-pipeline/pkg/api"
 	"github.com/netobserv/flowlogs-pipeline/pkg/config"
 	"github.com/netobserv/flowlogs-pipeline/pkg/operational"
+	"github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics"
 	putils "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/utils"
 	"github.com/netobserv/flowlogs-pipeline/pkg/utils"
 	"github.com/prometheus/client_golang/prometheus"
@@ -32,7 +33,7 @@ import (
 
 type mInfoStruct struct {
 	genericMetric interface{} // can be a counter, gauge, or histogram pointer
-	info          *MetricInfo
+	info          *metrics.Preprocessed
 }
 
 type MetricsCommonStruct struct {
@@ -84,22 +85,22 @@ var (
 	)
 )
 
-func (m *MetricsCommonStruct) AddCounter(name string, g interface{}, info *MetricInfo) {
+func (m *MetricsCommonStruct) AddCounter(name string, g interface{}, info *metrics.Preprocessed) {
 	mStruct := mInfoStruct{genericMetric: g, info: info}
 	m.counters[name] = mStruct
 }
 
-func (m *MetricsCommonStruct) AddGauge(name string, g interface{}, info *MetricInfo) {
+func (m *MetricsCommonStruct) AddGauge(name string, g interface{}, info *metrics.Preprocessed) {
 	mStruct := mInfoStruct{genericMetric: g, info: info}
 	m.gauges[name] = mStruct
 }
 
-func (m *MetricsCommonStruct) AddHist(name string, g interface{}, info *MetricInfo) {
+func (m *MetricsCommonStruct) AddHist(name string, g interface{}, info *metrics.Preprocessed) {
 	mStruct := mInfoStruct{genericMetric: g, info: info}
 	m.histos[name] = mStruct
 }
 
-func (m *MetricsCommonStruct) AddAggHist(name string, g interface{}, info *MetricInfo) {
+func (m *MetricsCommonStruct) AddAggHist(name string, g interface{}, info *metrics.Preprocessed) {
 	mStruct := mInfoStruct{genericMetric: g, info: info}
 	m.aggHistos[name] = mStruct
 }
@@ -109,91 +110,116 @@ func (m *MetricsCommonStruct) MetricCommonEncode(mci MetricsCommonInterface, met
 
 	// Process counters
 	for _, mInfo := range m.counters {
-		labels, value, _ := m.prepareMetric(mci, metricRecord, mInfo.info, mInfo.genericMetric)
-		if labels == nil {
+		labelSets, value := m.prepareMetric(mci, metricRecord, mInfo.info, mInfo.genericMetric)
+		if labelSets == nil {
 			continue
 		}
-		err := mci.ProcessCounter(mInfo.genericMetric, labels, value)
-		if err != nil {
-			log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
-			m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
-			continue
+		for _, labels := range labelSets {
+			err := mci.ProcessCounter(mInfo.genericMetric, labels.lMap, value)
+			if err != nil {
+				log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
+				m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
+				continue
+			}
+			m.metricsProcessed.Inc()
 		}
-		m.metricsProcessed.Inc()
 	}
 
 	// Process gauges
 	for _, mInfo := range m.gauges {
-		labels, value, key := m.prepareMetric(mci, metricRecord, mInfo.info, mInfo.genericMetric)
-		if labels == nil {
+		labelSets, value := m.prepareMetric(mci, metricRecord, mInfo.info, mInfo.genericMetric)
+		if labelSets == nil {
 			continue
 		}
-		err := mci.ProcessGauge(mInfo.genericMetric, labels, value, key)
-		if err != nil {
-			log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
-			m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
-			continue
+		for _, labels := range labelSets {
+			err := mci.ProcessGauge(mInfo.genericMetric, labels.lMap, value, labels.key)
+			if err != nil {
+				log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
+				m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
+				continue
+			}
+			m.metricsProcessed.Inc()
 		}
-		m.metricsProcessed.Inc()
 	}
 
 	// Process histograms
 	for _, mInfo := range m.histos {
-		labels, value, _ := m.prepareMetric(mci, metricRecord, mInfo.info, mInfo.genericMetric)
-		if labels == nil {
+		labelSets, value := m.prepareMetric(mci, metricRecord, mInfo.info, mInfo.genericMetric)
+		if labelSets == nil {
 			continue
 		}
-		err := mci.ProcessHist(mInfo.genericMetric, labels, value)
-		if err != nil {
-			log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
-			m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
-			continue
+		for _, labels := range labelSets {
+			err := mci.ProcessHist(mInfo.genericMetric, labels.lMap, value)
+			if err != nil {
+				log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
+				m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
+				continue
+			}
+			m.metricsProcessed.Inc()
 		}
-		m.metricsProcessed.Inc()
 	}
 
 	// Process pre-aggregated histograms
 	for _, mInfo := range m.aggHistos {
-		labels, values := m.prepareAggHisto(mci, metricRecord, mInfo.info, mInfo.genericMetric)
-		if labels == nil {
+		labelSets, values := m.prepareAggHisto(mci, metricRecord, mInfo.info, mInfo.genericMetric)
+		if labelSets == nil {
 			continue
 		}
-		err := mci.ProcessAggHist(mInfo.genericMetric, labels, values)
-		if err != nil {
-			log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
-			m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
-			continue
+		for _, labels := range labelSets {
+			err := mci.ProcessAggHist(mInfo.genericMetric, labels.lMap, values)
+			if err != nil {
+				log.Errorf("labels registering error on %s: %v", mInfo.info.Name, err)
+				m.errorsCounter.WithLabelValues("LabelsRegisteringError", mInfo.info.Name, "").Inc()
+				continue
+			}
+			m.metricsProcessed.Inc()
 		}
-		m.metricsProcessed.Inc()
 	}
 }
 
-func (m *MetricsCommonStruct) prepareMetric(mci MetricsCommonInterface, flow config.GenericMap, info *MetricInfo, mv interface{}) (map[string]string, float64, string) {
+func (m *MetricsCommonStruct) prepareMetric(mci MetricsCommonInterface, flow config.GenericMap, info *metrics.Preprocessed, mv interface{}) ([]labelsKeyAndMap, float64) {
+	flatParts := info.GenerateFlatParts(flow)
+	ok, flatParts := info.ApplyFilters(flow, flatParts)
+	if !ok {
+		return nil, 0
+	}
+
 	val := m.extractGenericValue(flow, info)
 	if val == nil {
-		return nil, 0, ""
+		return nil, 0
 	}
 	floatVal, err := utils.ConvertToFloat64(val)
 	if err != nil {
 		m.errorsCounter.WithLabelValues("ValueConversionError", info.Name, info.ValueKey).Inc()
-		return nil, 0, ""
+		return nil, 0
 	}
 	if info.ValueScale != 0 {
-		floatVal = floatVal / info.ValueScale
+		floatVal /= info.ValueScale
 	}
 
-	entryLabels, key := extractLabelsAndKey(flow, info)
-	// Update entry for expiry mechanism (the entry itself is its own cleanup function)
-	cacheEntry := mci.GetChacheEntry(entryLabels, mv)
-	ok := m.mCache.UpdateCacheEntry(key, cacheEntry)
-	if !ok {
-		m.metricsDropped.Inc()
-		return nil, 0, ""
+	labelSets := extractLabels(flow, flatParts, info)
+	var lkms []labelsKeyAndMap
+	for _, ls := range labelSets {
+		// Update entry for expiry mechanism (the entry itself is its own cleanup function)
+		lkm := ls.toKeyAndMap(info)
+		lkms = append(lkms, lkm)
+		cacheEntry := mci.GetChacheEntry(lkm.lMap, mv)
+		ok := m.mCache.UpdateCacheEntry(lkm.key, cacheEntry)
+		if !ok {
+			m.metricsDropped.Inc()
+			return nil, 0
+		}
 	}
-	return entryLabels, floatVal, key
+	return lkms, floatVal
 }
 
-func (m *MetricsCommonStruct) prepareAggHisto(mci MetricsCommonInterface, flow config.GenericMap, info *MetricInfo, mc interface{}) (map[string]string, []float64) {
+func (m *MetricsCommonStruct) prepareAggHisto(mci MetricsCommonInterface, flow config.GenericMap, info *metrics.Preprocessed, mc interface{}) ([]labelsKeyAndMap, []float64) {
+	flatParts := info.GenerateFlatParts(flow)
+	ok, flatParts := info.ApplyFilters(flow, flatParts)
+	if !ok {
+		return nil, nil
+	}
+
 	val := m.extractGenericValue(flow, info)
 	if val == nil {
 		return nil, nil
@@ -204,23 +230,23 @@ func (m *MetricsCommonStruct) prepareAggHisto(mci MetricsCommonInterface, flow c
 		return nil, nil
 	}
 
-	entryLabels, key := extractLabelsAndKey(flow, info)
-	// Update entry for expiry mechanism (the entry itself is its own cleanup function)
-	cacheEntry := mci.GetChacheEntry(entryLabels, mc)
-	ok = m.mCache.UpdateCacheEntry(key, cacheEntry)
-	if !ok {
-		m.metricsDropped.Inc()
-		return nil, nil
+	labelSets := extractLabels(flow, flatParts, info)
+	var lkms []labelsKeyAndMap
+	for _, ls := range labelSets {
+		// Update entry for expiry mechanism (the entry itself is its own cleanup function)
+		lkm := ls.toKeyAndMap(info)
+		lkms = append(lkms, lkm)
+		cacheEntry := mci.GetChacheEntry(lkm.lMap, mc)
+		ok := m.mCache.UpdateCacheEntry(lkm.key, cacheEntry)
+		if !ok {
+			m.metricsDropped.Inc()
+			return nil, nil
+		}
 	}
-	return entryLabels, values
+	return lkms, values
 }
 
-func (m *MetricsCommonStruct) extractGenericValue(flow config.GenericMap, info *MetricInfo) interface{} {
-	for _, pred := range info.FilterPredicates {
-		if !pred(flow) {
-			return nil
-		}
-	}
+func (m *MetricsCommonStruct) extractGenericValue(flow config.GenericMap, info *metrics.Preprocessed) interface{} {
 	if info.ValueKey == "" {
 		// No value key means it's a records / flows counter (1 flow = 1 increment), so just return 1
 		return 1
@@ -233,21 +259,58 @@ func (m *MetricsCommonStruct) extractGenericValue(flow config.GenericMap, info *
 	return val
 }
 
-func extractLabelsAndKey(flow config.GenericMap, info *MetricInfo) (map[string]string, string) {
-	entryLabels := make(map[string]string, len(info.MappedLabels))
+type label struct {
+	key   string
+	value string
+}
+
+type labelSet []label
+
+type labelsKeyAndMap struct {
+	key  string
+	lMap map[string]string
+}
+
+func (l labelSet) toKeyAndMap(info *metrics.Preprocessed) labelsKeyAndMap {
 	key := strings.Builder{}
 	key.WriteString(info.Name)
 	key.WriteRune('|')
-	for _, t := range info.MappedLabels {
-		value := ""
-		if v, ok := flow[t.Source]; ok {
-			value = utils.ConvertToString(v)
-		}
-		entryLabels[t.Target] = value
-		key.WriteString(value)
+	m := map[string]string{}
+	for _, kv := range l {
+		key.WriteString(kv.value)
 		key.WriteRune('|')
+		m[kv.key] = kv.value
+	}
+	return labelsKeyAndMap{key: key.String(), lMap: m}
+}
+
+// extractLabels takes the flow and a single metric definition as input.
+// It returns the flat labels maps (label names and values).
+// Most of the time it will return a single map; it may return several of them when the parsed flow fields are lists (e.g. "interfaces").
+func extractLabels(flow config.GenericMap, flatParts []config.GenericMap, info *metrics.Preprocessed) []labelSet {
+	common := newLabelSet(flow, info.MappedLabels)
+	if len(flatParts) == 0 {
+		return []labelSet{common}
+	}
+	var all []labelSet
+	for _, fp := range flatParts {
+		ls := newLabelSet(fp, info.FlattenedLabels)
+		ls = append(ls, common...)
+		all = append(all, ls)
+	}
+	return all
+}
+
+func newLabelSet(part config.GenericMap, labels []metrics.MappedLabel) labelSet {
+	var ls labelSet
+	for _, t := range labels {
+		label := label{key: t.Target, value: ""}
+		if v, ok := part[t.Source]; ok {
+			label.value = utils.ConvertToString(v)
+		}
+		ls = append(ls, label)
 	}
-	return entryLabels, key.String()
+	return ls
 }
 
 func (m *MetricsCommonStruct) cleanupExpiredEntriesLoop(callback putils.CacheCallback) {
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/encode_otlpmetrics.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/encode_otlpmetrics.go
index 12edd1d9..92200cec 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/encode_otlpmetrics.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/encode_otlpmetrics.go
@@ -25,6 +25,7 @@ import (
 	"github.com/netobserv/flowlogs-pipeline/pkg/config"
 	"github.com/netobserv/flowlogs-pipeline/pkg/operational"
 	"github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode"
+	"github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics"
 	log "github.com/sirupsen/logrus"
 	"go.opentelemetry.io/otel"
 	"go.opentelemetry.io/otel/attribute"
@@ -135,7 +136,7 @@ func NewEncodeOtlpMetrics(opMetrics *operational.Metrics, params config.StagePar
 		fullMetricName := cfg.Prefix + mCfg.Name
 		log.Debugf("fullMetricName = %v", fullMetricName)
 		log.Debugf("Labels = %v", mCfg.Labels)
-		mInfo := encode.CreateMetricInfo(mCfg)
+		mInfo := metrics.Preprocess(mCfg)
 		switch mCfg.Type {
 		case api.MetricCounter:
 			counter, err := meter.Float64Counter(fullMetricName)
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/opentelemetry.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/opentelemetry.go
index 246a9d76..2aebf146 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/opentelemetry.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry/opentelemetry.go
@@ -253,7 +253,7 @@ func (e *EncodeOtlpLogs) LogWrite(entry config.GenericMap) {
 	msg := string(msgByteArray)
 	// TODO: Decide whether the content should be delivered as Body or as Attributes
 	lrc := logs.LogRecordConfig{
-		//Timestamp:         &now, // take timestamp from entry, if present?
+		// Timestamp:         &now, // take timestamp from entry, if present?
 		ObservedTimestamp: now,
 		SeverityNumber:    &sn,
 		SeverityText:      &st,
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregate.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregate.go
index 7a7c2f81..22aa4292 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregate.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregate.go
@@ -71,7 +71,7 @@ func (aggregate *Aggregate) LabelsFromEntry(entry config.GenericMap) (Labels, bo
 		if !ok {
 			allLabelsFound = false
 		}
-		labels[key] = fmt.Sprint(value)
+		labels[key] = util.ConvertToString(value)
 	}
 
 	return labels, allLabelsFound
@@ -149,31 +149,29 @@ func (aggregate *Aggregate) UpdateByEntry(entry config.GenericMap, normalizedVal
 	if operation == OperationCount {
 		groupState.totalValue = float64(groupState.totalCount + 1)
 		groupState.recentOpValue = float64(groupState.recentCount + 1)
-	} else {
-		if operationKey != "" {
-			value, ok := entry[operationKey]
-			if ok {
-				valueString := util.ConvertToString(value)
-				if valueFloat64, err := strconv.ParseFloat(valueString, 64); err != nil {
-					// Log as debug to avoid performance impact
-					log.Debugf("UpdateByEntry error when parsing float '%s': %v", valueString, err)
-				} else {
-					switch operation {
-					case OperationSum:
-						groupState.totalValue += valueFloat64
-						groupState.recentOpValue += valueFloat64
-					case OperationMax:
-						groupState.totalValue = math.Max(groupState.totalValue, valueFloat64)
-						groupState.recentOpValue = math.Max(groupState.recentOpValue, valueFloat64)
-					case OperationMin:
-						groupState.totalValue = math.Min(groupState.totalValue, valueFloat64)
-						groupState.recentOpValue = math.Min(groupState.recentOpValue, valueFloat64)
-					case OperationAvg:
-						groupState.totalValue = (groupState.totalValue*float64(groupState.totalCount) + valueFloat64) / float64(groupState.totalCount+1)
-						groupState.recentOpValue = (groupState.recentOpValue*float64(groupState.recentCount) + valueFloat64) / float64(groupState.recentCount+1)
-					case OperationRawValues:
-						groupState.recentRawValues = append(groupState.recentRawValues, valueFloat64)
-					}
+	} else if operationKey != "" {
+		value, ok := entry[operationKey]
+		if ok {
+			valueString := util.ConvertToString(value)
+			if valueFloat64, err := strconv.ParseFloat(valueString, 64); err != nil {
+				// Log as debug to avoid performance impact
+				log.Debugf("UpdateByEntry error when parsing float '%s': %v", valueString, err)
+			} else {
+				switch operation {
+				case OperationSum:
+					groupState.totalValue += valueFloat64
+					groupState.recentOpValue += valueFloat64
+				case OperationMax:
+					groupState.totalValue = math.Max(groupState.totalValue, valueFloat64)
+					groupState.recentOpValue = math.Max(groupState.recentOpValue, valueFloat64)
+				case OperationMin:
+					groupState.totalValue = math.Min(groupState.totalValue, valueFloat64)
+					groupState.recentOpValue = math.Min(groupState.recentOpValue, valueFloat64)
+				case OperationAvg:
+					groupState.totalValue = (groupState.totalValue*float64(groupState.totalCount) + valueFloat64) / float64(groupState.totalCount+1)
+					groupState.recentOpValue = (groupState.recentOpValue*float64(groupState.recentCount) + valueFloat64) / float64(groupState.recentCount+1)
+				case OperationRawValues:
+					groupState.recentRawValues = append(groupState.recentRawValues, valueFloat64)
 				}
 			}
 		}
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregates.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregates.go
index de092d5e..848e9bf2 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregates.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate/aggregates.go
@@ -70,8 +70,7 @@ func (aggregates *Aggregates) addAggregate(aggregateDefinition *api.AggregateDef
 		expiryTime: expiryTime.Duration,
 	}
 
-	appendedAggregates := append(aggregates.Aggregates, aggregate)
-	return appendedAggregates
+	return append(aggregates.Aggregates, aggregate)
 }
 
 func (aggregates *Aggregates) cleanupExpiredEntriesLoop() {
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/filters.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/filters.go
index cd47c218..e753c3b3 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/filters.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/filters.go
@@ -102,7 +102,7 @@ func (fs *FilterStruct) CalculateValue(l *list.List, oldestValidTime time.Time)
 		}
 	}
 	if fs.Rule.OperationType == api.FilterOperationAvg && nItems > 0 {
-		currentValue = currentValue / float64(nItems)
+		currentValue /= float64(nItems)
 	}
 	if fs.Rule.OperationType == api.FilterOperationCnt {
 		currentValue = float64(nItems)
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/timebased.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/timebased.go
index 0a003f57..d6fded1d 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/timebased.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/timebased/timebased.go
@@ -80,10 +80,8 @@ func CreateIndexKeysAndFilters(rules []api.TimebasedFilterRule) (map[string]*Ind
 			}
 			tmpIndexKeyStructs[filterRule.IndexKey] = rStruct
 			log.Debugf("new IndexKeyTable: name = %s = %v", filterRule.IndexKey, *rStruct)
-		} else {
-			if filterRule.TimeInterval.Duration > rStruct.maxTimeInterval {
-				rStruct.maxTimeInterval = filterRule.TimeInterval.Duration
-			}
+		} else if filterRule.TimeInterval.Duration > rStruct.maxTimeInterval {
+			rStruct.maxTimeInterval = filterRule.TimeInterval.Duration
 		}
 		// verify the validity of the OperationType field in the filterRule
 		switch filterRule.OperationType {
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/ingest/ingest_grpc.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/ingest/ingest_grpc.go
index db9750b0..16afb8d7 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/ingest/ingest_grpc.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/ingest/ingest_grpc.go
@@ -7,7 +7,8 @@ import (
 	"github.com/netobserv/flowlogs-pipeline/pkg/api"
 	"github.com/netobserv/flowlogs-pipeline/pkg/config"
 	"github.com/netobserv/flowlogs-pipeline/pkg/operational"
-	"github.com/netobserv/flowlogs-pipeline/pkg/pipeline/utils"
+	pUtils "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/utils"
+	"github.com/netobserv/flowlogs-pipeline/pkg/utils"
 	"github.com/netobserv/netobserv-ebpf-agent/pkg/decode"
 	grpc "github.com/netobserv/netobserv-ebpf-agent/pkg/grpc/flow"
 	"github.com/netobserv/netobserv-ebpf-agent/pkg/pbflow"
@@ -60,7 +61,7 @@ func NewGRPCProtobuf(opMetrics *operational.Metrics, params config.StageParam) (
 func (no *GRPCProtobuf) Ingest(out chan<- config.GenericMap) {
 	no.metrics.createOutQueueLen(out)
 	go func() {
-		<-utils.ExitChannel()
+		<-pUtils.ExitChannel()
 		close(no.flowPackets)
 		no.collector.Close()
 	}()
@@ -108,7 +109,7 @@ func instrumentGRPC(m *metrics) grpc2.UnaryServerInterceptor {
 		if err != nil {
 			// "trace" level used to minimize performance impact
 			glog.Tracef("Reporting metric error: %v", err)
-			m.error(fmt.Sprint(status.Code(err)))
+			m.error(utils.ConvertToString(status.Code(err)))
 		}
 
 		// Stage duration
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/kubernetes/enrich.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/kubernetes/enrich.go
index 899e2f2c..b6a79df7 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/kubernetes/enrich.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/kubernetes/enrich.go
@@ -96,7 +96,7 @@ const nodeZoneLabelName = "topology.kubernetes.io/zone"
 
 func fillInK8sZone(outputEntry config.GenericMap, rule *api.K8sRule, kubeInfo *inf.Info, zonePrefix string) {
 	if !rule.AddZone {
-		//Nothing to do
+		// Nothing to do
 		return
 	}
 	switch kubeInfo.Type {
@@ -121,7 +121,7 @@ func fillInK8sZone(outputEntry config.GenericMap, rule *api.K8sRule, kubeInfo *i
 		return
 
 	case inf.TypeService:
-		//A service is not assigned to a dedicated zone, skipping
+		// A service is not assigned to a dedicated zone, skipping
 		return
 	}
 }
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_filter.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_filter.go
index 527a9245..0d8dd718 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_filter.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_filter.go
@@ -28,6 +28,7 @@ import (
 	"github.com/netobserv/flowlogs-pipeline/pkg/api"
 	"github.com/netobserv/flowlogs-pipeline/pkg/config"
 	"github.com/netobserv/flowlogs-pipeline/pkg/utils"
+	"github.com/netobserv/flowlogs-pipeline/pkg/utils/filters"
 	"github.com/sirupsen/logrus"
 )
 
@@ -37,7 +38,13 @@ var (
 )
 
 type Filter struct {
-	Rules []api.TransformFilterRule
+	Rules     []api.TransformFilterRule
+	KeepRules []predicatesRule
+}
+
+type predicatesRule struct {
+	predicates []filters.Predicate
+	sampling   uint16
 }
 
 // Transform transforms a flow; if false is returned as a second argument, the entry is dropped
@@ -45,6 +52,18 @@ func (f *Filter) Transform(entry config.GenericMap) (config.GenericMap, bool) {
 	tlog.Tracef("f = %v", f)
 	outputEntry := entry.Copy()
 	labels := make(map[string]string)
+	if len(f.KeepRules) > 0 {
+		keep := false
+		for _, r := range f.KeepRules {
+			if applyPredicates(outputEntry, r) {
+				keep = true
+				break
+			}
+		}
+		if !keep {
+			return nil, false
+		}
+	}
 	for i := range f.Rules {
 		tlog.Tracef("rule = %v", f.Rules[i])
 		if cont := applyRule(outputEntry, labels, &f.Rules[i]); !cont {
@@ -143,6 +162,9 @@ func applyRule(entry config.GenericMap, labels map[string]string, rule *api.Tran
 		return !isRemoveEntrySatisfied(entry, rule.RemoveEntryAllSatisfied)
 	case api.ConditionalSampling:
 		return sample(entry, rule.ConditionalSampling)
+	case api.KeepEntryAllSatisfied:
+		// This should be processed only in "applyPredicates". Failure to do so is a bug.
+		tlog.Panicf("unexpected KeepEntryAllSatisfied: %v", rule)
 	default:
 		tlog.Panicf("unknown type %s for transform.Filter rule: %v", rule.Type, rule)
 	}
@@ -159,25 +181,58 @@ func isRemoveEntrySatisfied(entry config.GenericMap, rules []*api.RemoveEntryRul
 	return true
 }
 
+func applyPredicates(entry config.GenericMap, rule predicatesRule) bool {
+	if !rollSampling(rule.sampling) {
+		return false
+	}
+	for _, p := range rule.predicates {
+		if !p(entry) {
+			return false
+		}
+	}
+	return true
+}
+
 func sample(entry config.GenericMap, rules []*api.SamplingCondition) bool {
 	for _, r := range rules {
 		if isRemoveEntrySatisfied(entry, r.Rules) {
-			return r.Value == 0 || (rndgen.Intn(int(r.Value)) == 0)
+			return rollSampling(r.Value)
 		}
 	}
 	return true
 }
 
+func rollSampling(value uint16) bool {
+	return value == 0 || (rndgen.Intn(int(value)) == 0)
+}
+
 // NewTransformFilter create a new filter transform
 func NewTransformFilter(params config.StageParam) (Transformer, error) {
 	tlog.Debugf("entering NewTransformFilter")
+	keepRules := []predicatesRule{}
 	rules := []api.TransformFilterRule{}
 	if params.Transform != nil && params.Transform.Filter != nil {
 		params.Transform.Filter.Preprocess()
-		rules = params.Transform.Filter.Rules
+		for i := range params.Transform.Filter.Rules {
+			baseRules := &params.Transform.Filter.Rules[i]
+			if baseRules.Type == api.KeepEntryAllSatisfied {
+				pr := predicatesRule{sampling: baseRules.KeepEntrySampling}
+				for _, keepRule := range baseRules.KeepEntryAllSatisfied {
+					pred, err := filters.FromKeepEntry(keepRule)
+					if err != nil {
+						return nil, err
+					}
+					pr.predicates = append(pr.predicates, pred)
+				}
+				keepRules = append(keepRules, pr)
+			} else {
+				rules = append(rules, *baseRules)
+			}
+		}
 	}
 	transformFilter := &Filter{
-		Rules: rules,
+		Rules:     rules,
+		KeepRules: keepRules,
 	}
 	return transformFilter, nil
 }
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_generic.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_generic.go
index c1f2e4dc..f30ca840 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_generic.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_generic.go
@@ -53,31 +53,31 @@ func (g *Generic) Transform(entry config.GenericMap) (config.GenericMap, bool) {
 
 func (g *Generic) performMultiplier(entry config.GenericMap, transformRule api.GenericTransformRule, outputEntry config.GenericMap) bool {
 	ok := true
-	switch entry[transformRule.Input].(type) {
+	switch val := entry[transformRule.Input].(type) {
 	case int:
-		outputEntry[transformRule.Output] = transformRule.Multiplier * entry[transformRule.Input].(int)
+		outputEntry[transformRule.Output] = transformRule.Multiplier * val
 	case uint:
-		outputEntry[transformRule.Output] = uint(transformRule.Multiplier) * entry[transformRule.Input].(uint)
+		outputEntry[transformRule.Output] = uint(transformRule.Multiplier) * val
 	case int8:
-		outputEntry[transformRule.Output] = int8(transformRule.Multiplier) * entry[transformRule.Input].(int8)
+		outputEntry[transformRule.Output] = int8(transformRule.Multiplier) * val
 	case uint8:
-		outputEntry[transformRule.Output] = uint8(transformRule.Multiplier) * entry[transformRule.Input].(uint8)
+		outputEntry[transformRule.Output] = uint8(transformRule.Multiplier) * val
 	case int16:
-		outputEntry[transformRule.Output] = int16(transformRule.Multiplier) * entry[transformRule.Input].(int16)
+		outputEntry[transformRule.Output] = int16(transformRule.Multiplier) * val
 	case uint16:
-		outputEntry[transformRule.Output] = uint16(transformRule.Multiplier) * entry[transformRule.Input].(uint16)
+		outputEntry[transformRule.Output] = uint16(transformRule.Multiplier) * val
 	case int32:
-		outputEntry[transformRule.Output] = int32(transformRule.Multiplier) * entry[transformRule.Input].(int32)
+		outputEntry[transformRule.Output] = int32(transformRule.Multiplier) * val
 	case uint32:
-		outputEntry[transformRule.Output] = uint32(transformRule.Multiplier) * entry[transformRule.Input].(uint32)
+		outputEntry[transformRule.Output] = uint32(transformRule.Multiplier) * val
 	case int64:
-		outputEntry[transformRule.Output] = int64(transformRule.Multiplier) * entry[transformRule.Input].(int64)
+		outputEntry[transformRule.Output] = int64(transformRule.Multiplier) * val
 	case uint64:
-		outputEntry[transformRule.Output] = uint64(transformRule.Multiplier) * entry[transformRule.Input].(uint64)
+		outputEntry[transformRule.Output] = uint64(transformRule.Multiplier) * val
 	case float32:
-		outputEntry[transformRule.Output] = float32(transformRule.Multiplier) * entry[transformRule.Input].(float32)
+		outputEntry[transformRule.Output] = float32(transformRule.Multiplier) * val
 	case float64:
-		outputEntry[transformRule.Output] = float64(transformRule.Multiplier) * entry[transformRule.Input].(float64)
+		outputEntry[transformRule.Output] = float64(transformRule.Multiplier) * val
 	default:
 		ok = false
 		glog.Errorf("%s not of numerical type; cannot perform multiplication", transformRule.Output)
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_network.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_network.go
index b79e6759..09934ae7 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_network.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/transform/transform_network.go
@@ -141,6 +141,13 @@ func (n *Network) Transform(inputEntry config.GenericMap) (config.GenericMap, bo
 					}
 				}
 			}
+		case api.NetworkDecodeTCPFlags:
+			if anyFlags, ok := outputEntry[rule.DecodeTCPFlags.Input]; ok && anyFlags != nil {
+				if flags, ok := anyFlags.(uint16); ok {
+					flags := util.DecodeTCPFlags(flags)
+					outputEntry[rule.DecodeTCPFlags.Output] = flags
+				}
+			}
 
 		default:
 			log.Panicf("unknown type %s for transform.Network rule: %v", rule.Type, rule)
@@ -194,7 +201,8 @@ func NewTransformNetwork(params config.StageParam, opMetrics *operational.Metric
 			if len(jsonNetworkTransform.SubnetLabels) == 0 {
 				return nil, fmt.Errorf("a rule '%s' was found, but there are no subnet labels configured", api.NetworkAddSubnetLabel)
 			}
-		case api.NetworkAddSubnet:
+		case api.NetworkAddSubnet, api.NetworkDecodeTCPFlags:
+			// nothing
 		}
 	}
 
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_ipfix.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_ipfix.go
index fd75575a..49f8d488 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_ipfix.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_ipfix.go
@@ -309,53 +309,53 @@ func addKubeContextToTemplate(elements *[]entities.InfoElementWithValue, registr
 	return nil
 }
 
-func loadCustomRegistry(EnterpriseID uint32) error {
-	err := registry.InitNewRegistry(EnterpriseID)
+func loadCustomRegistry(enterpriseID uint32) error {
+	err := registry.InitNewRegistry(enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to initialize registry")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("sourcePodNamespace", 7733, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("sourcePodNamespace", 7733, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("sourcePodName", 7734, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("sourcePodName", 7734, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("destinationPodNamespace", 7735, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("destinationPodNamespace", 7735, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("destinationPodName", 7736, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("destinationPodName", 7736, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("sourceNodeName", 7737, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("sourceNodeName", 7737, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("destinationNodeName", 7738, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("destinationNodeName", 7738, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("timeFlowRttNs", 7740, entities.Unsigned64, EnterpriseID, 8)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("timeFlowRttNs", 7740, entities.Unsigned64, enterpriseID, 8)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("interfaces", 7741, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("interfaces", 7741, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
 	}
-	err = registry.PutInfoElement((*entities.NewInfoElement("directions", 7742, entities.String, EnterpriseID, 65535)), EnterpriseID)
+	err = registry.PutInfoElement((*entities.NewInfoElement("directions", 7742, entities.String, enterpriseID, 65535)), enterpriseID)
 	if err != nil {
 		ilog.WithError(err).Errorf("Failed to register element")
 		return err
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_loki.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_loki.go
index 8b46db9b..ceea8450 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_loki.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/write_loki.go
@@ -27,6 +27,7 @@ import (
 	"github.com/netobserv/flowlogs-pipeline/pkg/config"
 	"github.com/netobserv/flowlogs-pipeline/pkg/operational"
 	pUtils "github.com/netobserv/flowlogs-pipeline/pkg/pipeline/utils"
+	"github.com/netobserv/flowlogs-pipeline/pkg/utils"
 
 	logAdapter "github.com/go-kit/kit/log/logrus"
 	jsonIter "github.com/json-iterator/go"
@@ -117,7 +118,8 @@ func (l *Loki) ProcessRecord(in config.GenericMap) error {
 	l.addLabels(in, labels)
 
 	// Remove labels and configured ignore list from record
-	ignoreList := append(l.apiConfig.IgnoreList, l.apiConfig.Labels...)
+	ignoreList := l.apiConfig.IgnoreList
+	ignoreList = append(ignoreList, l.apiConfig.Labels...)
 	for _, label := range ignoreList {
 		delete(out, label)
 	}
@@ -172,7 +174,7 @@ func (l *Loki) addLabels(record config.GenericMap, labels model.LabelSet) {
 		if !ok {
 			continue
 		}
-		lv := model.LabelValue(fmt.Sprint(val))
+		lv := model.LabelValue(utils.ConvertToString(val))
 		if !lv.IsValid() {
 			log.WithFields(logrus.Fields{"key": label, "value": val}).
 				Debug("Invalid label value. Ignoring it")
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/convert.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/convert.go
index 1c4f0db3..40128bb5 100644
--- a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/convert.go
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/convert.go
@@ -246,9 +246,9 @@ func ConvertToBool(unk interface{}) (bool, error) {
 func ConvertToString(unk interface{}) string {
 	switch i := unk.(type) {
 	case float64:
-		return strconv.FormatFloat(i, 'E', -1, 64)
+		return strconv.FormatFloat(i, 'f', -1, 64)
 	case float32:
-		return strconv.FormatFloat(float64(i), 'E', -1, 32)
+		return strconv.FormatFloat(float64(i), 'f', -1, 32)
 	case int64:
 		return strconv.FormatInt(i, 10)
 	case int32:
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/filters/filters.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/filters/filters.go
new file mode 100644
index 00000000..8bb12fcb
--- /dev/null
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/filters/filters.go
@@ -0,0 +1,153 @@
+package filters
+
+import (
+	"fmt"
+	"regexp"
+	"strings"
+
+	"github.com/netobserv/flowlogs-pipeline/pkg/api"
+	"github.com/netobserv/flowlogs-pipeline/pkg/config"
+	"github.com/netobserv/flowlogs-pipeline/pkg/utils"
+)
+
+type Predicate func(config.GenericMap) bool
+
+var variableExtractor = regexp.MustCompile(`\$\(([^\)]+)\)`)
+
+func Presence(key string) Predicate {
+	return func(flow config.GenericMap) bool {
+		_, found := flow[key]
+		return found
+	}
+}
+
+func Absence(key string) Predicate {
+	return func(flow config.GenericMap) bool {
+		_, found := flow[key]
+		return !found
+	}
+}
+
+func Equal(key string, filterValue any, convertString bool) Predicate {
+	varLookups := extractVarLookups(filterValue)
+	if len(varLookups) > 0 {
+		return func(flow config.GenericMap) bool {
+			if val, found := flow[key]; found {
+				// Variable injection => convert to string
+				sVal, ok := val.(string)
+				if !ok {
+					sVal = utils.ConvertToString(val)
+				}
+				injected := injectVars(flow, filterValue.(string), varLookups)
+				return sVal == injected
+			}
+			return false
+		}
+	}
+	if convertString {
+		return func(flow config.GenericMap) bool {
+			if val, found := flow[key]; found {
+				sVal, ok := val.(string)
+				if !ok {
+					sVal = utils.ConvertToString(val)
+				}
+				return sVal == filterValue
+			}
+			return false
+		}
+	}
+	return func(flow config.GenericMap) bool {
+		if val, found := flow[key]; found {
+			return val == filterValue
+		}
+		return false
+	}
+}
+
+func NotEqual(key string, filterValue any, convertString bool) Predicate {
+	pred := Equal(key, filterValue, convertString)
+	return func(flow config.GenericMap) bool { return !pred(flow) }
+}
+
+func Regex(key string, filterRegex *regexp.Regexp) Predicate {
+	return func(flow config.GenericMap) bool {
+		if val, found := flow[key]; found {
+			sVal, ok := val.(string)
+			if !ok {
+				sVal = utils.ConvertToString(val)
+			}
+			return filterRegex.MatchString(sVal)
+		}
+		return false
+	}
+}
+
+func NotRegex(key string, filterRegex *regexp.Regexp) Predicate {
+	pred := Regex(key, filterRegex)
+	return func(flow config.GenericMap) bool { return !pred(flow) }
+}
+
+func extractVarLookups(value any) [][]string {
+	// Extract list of variables to lookup
+	// E.g: filter "$(SrcAddr):$(SrcPort)" would return [SrcAddr,SrcPort]
+	if sVal, isString := value.(string); isString {
+		if len(sVal) > 0 {
+			return variableExtractor.FindAllStringSubmatch(sVal, -1)
+		}
+	}
+	return nil
+}
+
+func injectVars(flow config.GenericMap, filterValue string, varLookups [][]string) string {
+	injected := filterValue
+	for _, matchGroup := range varLookups {
+		var value string
+		if rawVal, found := flow[matchGroup[1]]; found {
+			if sVal, ok := rawVal.(string); ok {
+				value = sVal
+			} else {
+				value = utils.ConvertToString(rawVal)
+			}
+		}
+		injected = strings.ReplaceAll(injected, matchGroup[0], value)
+	}
+	return injected
+}
+
+func FromKeepEntry(from *api.KeepEntryRule) (Predicate, error) {
+	switch from.Type {
+	case api.KeepEntryIfExists:
+		return Presence(from.KeepEntry.Input), nil
+	case api.KeepEntryIfDoesntExist:
+		return Absence(from.KeepEntry.Input), nil
+	case api.KeepEntryIfEqual:
+		return Equal(from.KeepEntry.Input, from.KeepEntry.Value, true), nil
+	case api.KeepEntryIfNotEqual:
+		return NotEqual(from.KeepEntry.Input, from.KeepEntry.Value, true), nil
+	case api.KeepEntryIfRegexMatch:
+		if r, err := compileRegex(from.KeepEntry); err != nil {
+			return nil, err
+		} else {
+			return Regex(from.KeepEntry.Input, r), nil
+		}
+	case api.KeepEntryIfNotRegexMatch:
+		if r, err := compileRegex(from.KeepEntry); err != nil {
+			return nil, err
+		} else {
+			return NotRegex(from.KeepEntry.Input, r), nil
+		}
+	}
+	return nil, fmt.Errorf("keep entry rule type not recognized: %s", from.Type)
+}
+
+func compileRegex(from *api.TransformFilterGenericRule) (*regexp.Regexp, error) {
+	s, ok := from.Value.(string)
+	if !ok {
+		return nil, fmt.Errorf("invalid regex keep rule: rule value must be a string [%v]", from)
+	}
+	r, err := regexp.Compile(s)
+	if err != nil {
+		return nil, fmt.Errorf("invalid regex keep rule: cannot compile regex [%w]", err)
+	}
+	return r, nil
+}
diff --git a/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/tcp_flags.go b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/tcp_flags.go
new file mode 100644
index 00000000..c07d3f37
--- /dev/null
+++ b/vendor/github.com/netobserv/flowlogs-pipeline/pkg/utils/tcp_flags.go
@@ -0,0 +1,30 @@
+package utils
+
+type tcpFlag struct {
+	value uint16
+	name  string
+}
+
+var tcpFlags = []tcpFlag{
+	{value: 1, name: "FIN"},
+	{value: 2, name: "SYN"},
+	{value: 4, name: "RST"},
+	{value: 8, name: "PSH"},
+	{value: 16, name: "ACK"},
+	{value: 32, name: "URG"},
+	{value: 64, name: "ECE"},
+	{value: 128, name: "CWR"},
+	{value: 256, name: "SYN_ACK"},
+	{value: 512, name: "FIN_ACK"},
+	{value: 1024, name: "RST_ACK"},
+}
+
+func DecodeTCPFlags(bitfield uint16) []string {
+	var values []string
+	for _, flag := range tcpFlags {
+		if bitfield&flag.value != 0 {
+			values = append(values, flag.name)
+		}
+	}
+	return values
+}
diff --git a/vendor/github.com/rs/xid/.gitignore b/vendor/github.com/rs/xid/.gitignore
new file mode 100644
index 00000000..81be9277
--- /dev/null
+++ b/vendor/github.com/rs/xid/.gitignore
@@ -0,0 +1,3 @@
+/.idea
+/.vscode
+.DS_Store
\ No newline at end of file
diff --git a/vendor/github.com/rs/xid/README.md b/vendor/github.com/rs/xid/README.md
index 974e67d2..1bf45bd1 100644
--- a/vendor/github.com/rs/xid/README.md
+++ b/vendor/github.com/rs/xid/README.md
@@ -4,7 +4,7 @@
 
 Package xid is a globally unique id generator library, ready to safely be used directly in your server code.
 
-Xid uses the Mongo Object ID algorithm to generate globally unique ids with a different serialization (base64) to make it shorter when transported as a string:
+Xid uses the Mongo Object ID algorithm to generate globally unique ids with a different serialization ([base32hex](https://datatracker.ietf.org/doc/html/rfc4648#page-10)) to make it shorter when transported as a string:
 https://docs.mongodb.org/manual/reference/object-id/
 
 - 4-byte value representing the seconds since the Unix epoch,
@@ -13,7 +13,7 @@ https://docs.mongodb.org/manual/reference/object-id/
 - 3-byte counter, starting with a random value.
 
 The binary representation of the id is compatible with Mongo 12 bytes Object IDs.
-The string representation is using base32 hex (w/o padding) for better space efficiency
+The string representation is using [base32hex](https://datatracker.ietf.org/doc/html/rfc4648#page-10) (w/o padding) for better space efficiency
 when stored in that form (20 bytes). The hex variant of base32 is used to retain the
 sortable property of the id.
 
@@ -71,8 +71,10 @@ References:
 - Java port by [0xShamil](https://github.com/0xShamil/): https://github.com/0xShamil/java-xid
 - Dart port by [Peter Bwire](https://github.com/pitabwire): https://pub.dev/packages/xid
 - PostgreSQL port by [Rasmus Holm](https://github.com/crholm): https://github.com/modfin/pg-xid
-- Swift port by [Uditha Atukorala](https://github.com/uditha-atukorala): https://github.com/uditha-atukorala/swift-xid
-- C++ port by [Uditha Atukorala](https://github.com/uditha-atukorala): https://github.com/uditha-atukorala/libxid
+- Swift port by [Uditha Atukorala](https://github.com/uatuko): https://github.com/uatuko/swift-xid
+- C++ port by [Uditha Atukorala](https://github.com/uatuko): https://github.com/uatuko/libxid
+- Typescript & Javascript port by [Yiwen AI](https://github.com/yiwen-ai): https://github.com/yiwen-ai/xid-ts
+- Gleam port by [Alexandre Del Vecchio](https://github.com/defgenx): https://github.com/defgenx/gxid
 
 ## Install
 
diff --git a/vendor/github.com/rs/xid/hostid_darwin.go b/vendor/github.com/rs/xid/hostid_darwin.go
index 08351ff7..17351563 100644
--- a/vendor/github.com/rs/xid/hostid_darwin.go
+++ b/vendor/github.com/rs/xid/hostid_darwin.go
@@ -2,8 +2,33 @@
 
 package xid
 
-import "syscall"
+import (
+	"errors"
+	"os/exec"
+	"strings"
+)
 
 func readPlatformMachineID() (string, error) {
-	return syscall.Sysctl("kern.uuid")
+	ioreg, err := exec.LookPath("ioreg")
+	if err != nil {
+		return "", err
+	}
+
+	cmd := exec.Command(ioreg, "-rd1", "-c", "IOPlatformExpertDevice")
+	out, err := cmd.CombinedOutput()
+	if err != nil {
+		return "", err
+	}
+
+	for _, line := range strings.Split(string(out), "\n") {
+		if strings.Contains(line, "IOPlatformUUID") {
+			parts := strings.SplitAfter(line, `" = "`)
+			if len(parts) == 2 {
+				uuid := strings.TrimRight(parts[1], `"`)
+				return strings.ToLower(uuid), nil
+			}
+		}
+	}
+
+	return "", errors.New("cannot find host id")
 }
diff --git a/vendor/github.com/rs/xid/hostid_windows.go b/vendor/github.com/rs/xid/hostid_windows.go
index ec2593ee..a4d98ab0 100644
--- a/vendor/github.com/rs/xid/hostid_windows.go
+++ b/vendor/github.com/rs/xid/hostid_windows.go
@@ -11,11 +11,17 @@ import (
 func readPlatformMachineID() (string, error) {
 	// source: https://github.com/shirou/gopsutil/blob/master/host/host_syscall.go
 	var h syscall.Handle
-	err := syscall.RegOpenKeyEx(syscall.HKEY_LOCAL_MACHINE, syscall.StringToUTF16Ptr(`SOFTWARE\Microsoft\Cryptography`), 0, syscall.KEY_READ|syscall.KEY_WOW64_64KEY, &h)
+
+	regKeyCryptoPtr, err := syscall.UTF16PtrFromString(`SOFTWARE\Microsoft\Cryptography`)
+	if err != nil {
+		return "", fmt.Errorf(`error reading registry key "SOFTWARE\Microsoft\Cryptography": %w`, err)
+	}
+
+	err = syscall.RegOpenKeyEx(syscall.HKEY_LOCAL_MACHINE, regKeyCryptoPtr, 0, syscall.KEY_READ|syscall.KEY_WOW64_64KEY, &h)
 	if err != nil {
 		return "", err
 	}
-	defer syscall.RegCloseKey(h)
+	defer func() { _ = syscall.RegCloseKey(h) }()
 
 	const syscallRegBufLen = 74 // len(`{`) + len(`abcdefgh-1234-456789012-123345456671` * 2) + len(`}`) // 2 == bytes/UTF16
 	const uuidLen = 36
@@ -23,9 +29,15 @@ func readPlatformMachineID() (string, error) {
 	var regBuf [syscallRegBufLen]uint16
 	bufLen := uint32(syscallRegBufLen)
 	var valType uint32
-	err = syscall.RegQueryValueEx(h, syscall.StringToUTF16Ptr(`MachineGuid`), nil, &valType, (*byte)(unsafe.Pointer(&regBuf[0])), &bufLen)
+
+	mGuidPtr, err := syscall.UTF16PtrFromString(`MachineGuid`)
 	if err != nil {
-		return "", err
+		return "", fmt.Errorf("error reading machine GUID: %w", err)
+	}
+
+	err = syscall.RegQueryValueEx(h, mGuidPtr, nil, &valType, (*byte)(unsafe.Pointer(&regBuf[0])), &bufLen)
+	if err != nil {
+		return "", fmt.Errorf("error parsing ")
 	}
 
 	hostID := syscall.UTF16ToString(regBuf[:])
diff --git a/vendor/github.com/rs/xid/id.go b/vendor/github.com/rs/xid/id.go
index fcd7a041..e88984d9 100644
--- a/vendor/github.com/rs/xid/id.go
+++ b/vendor/github.com/rs/xid/id.go
@@ -54,7 +54,6 @@ import (
 	"sort"
 	"sync/atomic"
 	"time"
-	"unsafe"
 )
 
 // Code inspired from mgo/bson ObjectId
@@ -172,7 +171,7 @@ func FromString(id string) (ID, error) {
 func (id ID) String() string {
 	text := make([]byte, encodedLen)
 	encode(text, id[:])
-	return *(*string)(unsafe.Pointer(&text))
+	return string(text)
 }
 
 // Encode encodes the id using base32 encoding, writing 20 bytes to dst and return it.
@@ -206,23 +205,23 @@ func encode(dst, id []byte) {
 
 	dst[19] = encoding[(id[11]<<4)&0x1F]
 	dst[18] = encoding[(id[11]>>1)&0x1F]
-	dst[17] = encoding[(id[11]>>6)&0x1F|(id[10]<<2)&0x1F]
+	dst[17] = encoding[(id[11]>>6)|(id[10]<<2)&0x1F]
 	dst[16] = encoding[id[10]>>3]
 	dst[15] = encoding[id[9]&0x1F]
 	dst[14] = encoding[(id[9]>>5)|(id[8]<<3)&0x1F]
 	dst[13] = encoding[(id[8]>>2)&0x1F]
 	dst[12] = encoding[id[8]>>7|(id[7]<<1)&0x1F]
-	dst[11] = encoding[(id[7]>>4)&0x1F|(id[6]<<4)&0x1F]
+	dst[11] = encoding[(id[7]>>4)|(id[6]<<4)&0x1F]
 	dst[10] = encoding[(id[6]>>1)&0x1F]
-	dst[9] = encoding[(id[6]>>6)&0x1F|(id[5]<<2)&0x1F]
+	dst[9] = encoding[(id[6]>>6)|(id[5]<<2)&0x1F]
 	dst[8] = encoding[id[5]>>3]
 	dst[7] = encoding[id[4]&0x1F]
 	dst[6] = encoding[id[4]>>5|(id[3]<<3)&0x1F]
 	dst[5] = encoding[(id[3]>>2)&0x1F]
 	dst[4] = encoding[id[3]>>7|(id[2]<<1)&0x1F]
-	dst[3] = encoding[(id[2]>>4)&0x1F|(id[1]<<4)&0x1F]
+	dst[3] = encoding[(id[2]>>4)|(id[1]<<4)&0x1F]
 	dst[2] = encoding[(id[1]>>1)&0x1F]
-	dst[1] = encoding[(id[1]>>6)&0x1F|(id[0]<<2)&0x1F]
+	dst[1] = encoding[(id[1]>>6)|(id[0]<<2)&0x1F]
 	dst[0] = encoding[id[0]>>3]
 }
 
diff --git a/vendor/go.opentelemetry.io/otel/.gitignore b/vendor/go.opentelemetry.io/otel/.gitignore
index 895c7664..ae8577ef 100644
--- a/vendor/go.opentelemetry.io/otel/.gitignore
+++ b/vendor/go.opentelemetry.io/otel/.gitignore
@@ -12,11 +12,3 @@ go.work
 go.work.sum
 
 gen/
-
-/example/dice/dice
-/example/namedtracer/namedtracer
-/example/otel-collector/otel-collector
-/example/opencensus/opencensus
-/example/passthrough/passthrough
-/example/prometheus/prometheus
-/example/zipkin/zipkin
diff --git a/vendor/go.opentelemetry.io/otel/.golangci.yml b/vendor/go.opentelemetry.io/otel/.golangci.yml
index d9abe194..dbfb2a16 100644
--- a/vendor/go.opentelemetry.io/otel/.golangci.yml
+++ b/vendor/go.opentelemetry.io/otel/.golangci.yml
@@ -25,6 +25,7 @@ linters:
     - revive
     - staticcheck
     - tenv
+    - testifylint
     - typecheck
     - unconvert
     - unused
@@ -64,12 +65,12 @@ issues:
     - path: _test\.go
       linters:
         - gosec
-    # Igonoring gosec G404: Use of weak random number generator (math/rand instead of crypto/rand)
+    # Ignoring gosec G404: Use of weak random number generator (math/rand instead of crypto/rand)
     # as we commonly use it in tests and examples.
     - text: "G404:"
       linters:
         - gosec
-    # Igonoring gosec G402: TLS MinVersion too low
+    # Ignoring gosec G402: TLS MinVersion too low
     # as the https://pkg.go.dev/crypto/tls#Config handles MinVersion default well.
     - text: "G402: TLS MinVersion too low."
       linters:
@@ -126,8 +127,6 @@ linters-settings:
           - "**/metric/**/*.go"
           - "**/bridge/*.go"
           - "**/bridge/**/*.go"
-          - "**/example/*.go"
-          - "**/example/**/*.go"
           - "**/trace/*.go"
           - "**/trace/**/*.go"
           - "**/log/*.go"
@@ -302,3 +301,9 @@ linters-settings:
       # https://github.com/mgechev/revive/blob/master/RULES_DESCRIPTIONS.md#waitgroup-by-value
       - name: waitgroup-by-value
         disabled: false
+  testifylint:
+    enable-all: true
+    disable:
+      - float-compare
+      - go-require
+      - require-error
diff --git a/vendor/go.opentelemetry.io/otel/CHANGELOG.md b/vendor/go.opentelemetry.io/otel/CHANGELOG.md
index 6107c17b..8f68dbd0 100644
--- a/vendor/go.opentelemetry.io/otel/CHANGELOG.md
+++ b/vendor/go.opentelemetry.io/otel/CHANGELOG.md
@@ -11,6 +11,100 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm
 <!-- Released section -->
 <!-- Don't change this section unless doing release -->
 
+## [1.32.0/0.54.0/0.8.0/0.0.11] 2024-11-08
+
+### Added
+
+- Add `go.opentelemetry.io/otel/sdk/metric/exemplar.AlwaysOffFilter`, which can be used to disable exemplar recording. (#5850)
+- Add `go.opentelemetry.io/otel/sdk/metric.WithExemplarFilter`, which can be used to configure the exemplar filter used by the metrics SDK. (#5850)
+- Add `ExemplarReservoirProviderSelector` and `DefaultExemplarReservoirProviderSelector` to `go.opentelemetry.io/otel/sdk/metric`, which defines the exemplar reservoir to use based on the aggregation of the metric. (#5861)
+- Add `ExemplarReservoirProviderSelector` to `go.opentelemetry.io/otel/sdk/metric.Stream` to allow using views to configure the exemplar reservoir to use for a metric. (#5861)
+- Add `ReservoirProvider`, `HistogramReservoirProvider` and `FixedSizeReservoirProvider` to `go.opentelemetry.io/otel/sdk/metric/exemplar` to make it convenient to use providers of Reservoirs. (#5861)
+- The `go.opentelemetry.io/otel/semconv/v1.27.0` package.
+  The package contains semantic conventions from the `v1.27.0` version of the OpenTelemetry Semantic Conventions. (#5894)
+- Add `Attributes attribute.Set` field to `Scope` in `go.opentelemetry.io/otel/sdk/instrumentation`. (#5903)
+- Add `Attributes attribute.Set` field to `ScopeRecords` in `go.opentelemetry.io/otel/log/logtest`. (#5927)
+- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc` adds instrumentation scope attributes. (#5934)
+- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp` adds instrumentation scope attributes. (#5934)
+- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc` adds instrumentation scope attributes. (#5935)
+- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp` adds instrumentation scope attributes. (#5935)
+- `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc` adds instrumentation scope attributes. (#5933)
+- `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp` adds instrumentation scope attributes. (#5933)
+- `go.opentelemetry.io/otel/exporters/prometheus` adds instrumentation scope attributes in `otel_scope_info` metric as labels. (#5932)
+
+### Changed
+
+- Support scope attributes and make them as identifying for `Tracer` in `go.opentelemetry.io/otel` and `go.opentelemetry.io/otel/sdk/trace`. (#5924)
+- Support scope attributes and make them as identifying for `Meter` in `go.opentelemetry.io/otel` and `go.opentelemetry.io/otel/sdk/metric`. (#5926)
+- Support scope attributes and make them as identifying for `Logger` in `go.opentelemetry.io/otel` and `go.opentelemetry.io/otel/sdk/log`. (#5925)
+- Make schema URL and scope attributes as identifying for `Tracer` in `go.opentelemetry.io/otel/bridge/opentracing`. (#5931)
+- Clear unneeded slice elements to allow GC to collect the objects in `go.opentelemetry.io/otel/sdk/metric` and `go.opentelemetry.io/otel/sdk/trace`. (#5804)
+
+### Fixed
+
+- Global MeterProvider registration unwraps global instrument Observers, the undocumented Unwrap() methods are now private. (#5881)
+- `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc` now keeps the metadata already present in the context when `WithHeaders` is used. (#5892)
+- `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc` now keeps the metadata already present in the context when `WithHeaders` is used. (#5911)
+- `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc` now keeps the metadata already present in the context when `WithHeaders` is used. (#5915)
+- Fix `go.opentelemetry.io/otel/exporters/prometheus` trying to add exemplars to Gauge metrics, which is unsupported. (#5912)
+- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc`. (#5944)
+- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp`. (#5944)
+- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc`. (#5944)
+- Fix `WithEndpointURL` to always use a secure connection when an https URL is passed in `go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp`. (#5944)
+- Fix incorrect metrics generated from callbacks when multiple readers are used in `go.opentelemetry.io/otel/sdk/metric`. (#5900)
+
+### Removed
+
+- Remove all examples under `go.opentelemetry.io/otel/example` as they are moved to [Contrib repository](https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/examples). (#5930)
+
+## [1.31.0/0.53.0/0.7.0/0.0.10] 2024-10-11
+
+### Added
+
+- Add `go.opentelemetry.io/otel/sdk/metric/exemplar` package which includes `Exemplar`, `Filter`, `TraceBasedFilter`, `AlwaysOnFilter`, `HistogramReservoir`, `FixedSizeReservoir`, `Reservoir`, `Value` and `ValueType` types. These will be used for configuring the exemplar reservoir for the metrics sdk. (#5747, #5862)
+- Add `WithExportBufferSize` option to log batch processor.(#5877)
+
+### Changed
+
+- Enable exemplars by default in `go.opentelemetry.io/otel/sdk/metric`. Exemplars can be disabled by setting `OTEL_METRICS_EXEMPLAR_FILTER=always_off` (#5778)
+- `Logger.Enabled` in `go.opentelemetry.io/otel/log` now accepts a newly introduced `EnabledParameters` type instead of `Record`. (#5791)
+- `FilterProcessor.Enabled` in `go.opentelemetry.io/otel/sdk/log/internal/x` now accepts `EnabledParameters` instead of `Record`. (#5791)
+- The `Record` type in `go.opentelemetry.io/otel/log` is no longer comparable. (#5847)
+- Performance improvements for the trace SDK `SetAttributes` method in `Span`. (#5864)
+- Reduce memory allocations for the `Event` and `Link` lists in `Span`. (#5858)
+- Performance improvements for the trace SDK `AddEvent`, `AddLink`, `RecordError` and `End` methods in `Span`. (#5874)
+
+### Deprecated
+
+- Deprecate all examples under `go.opentelemetry.io/otel/example` as they are moved to [Contrib repository](https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/examples). (#5854)
+
+### Fixed
+
+- The race condition for multiple `FixedSize` exemplar reservoirs identified in #5814 is resolved. (#5819)
+- Fix log records duplication in case of heterogeneous resource attributes by correctly mapping each log record to it's resource and scope. (#5803)
+- Fix timer channel drain to avoid hanging on Go 1.23. (#5868)
+- Fix delegation for global meter providers, and panic when calling otel.SetMeterProvider. (#5827)
+- Change the `reflect.TypeOf` to use a nil pointer to not allocate on the heap unless necessary. (#5827)
+
+## [1.30.0/0.52.0/0.6.0/0.0.9] 2024-09-09
+
+### Added
+
+- Support `OTEL_EXPORTER_OTLP_LOGS_INSECURE` and `OTEL_EXPORTER_OTLP_INSECURE` environments in `go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploggrpc`. (#5739)
+- The `WithResource` option for `NewMeterProvider` now merges the provided resources with the ones from environment variables. (#5773)
+- The `WithResource` option for `NewLoggerProvider` now merges the provided resources with the ones from environment variables. (#5773)
+- Add UTF-8 support to `go.opentelemetry.io/otel/exporters/prometheus`. (#5755)
+
+### Fixed
+
+- Fix memory leak in the global `MeterProvider` when identical instruments are repeatedly created. (#5754)
+- Fix panic on instruments creation when setting meter provider. (#5758)
+- Fix an issue where `SetMeterProvider` in `go.opentelemetry.io/otel` might miss the delegation for instruments and registries. (#5780)
+
+### Removed
+
+- Drop support for [Go 1.21]. (#5736, #5740, #5800)
+
 ## [1.29.0/0.51.0/0.5.0] 2024-08-23
 
 This release is the last to support [Go 1.21].
@@ -1895,7 +1989,7 @@ with major version 0.
 - Setting error status while recording error with Span from oteltest package. (#1729)
 - The concept of a remote and local Span stored in a context is unified to just the current Span.
   Because of this `"go.opentelemetry.io/otel/trace".RemoteSpanContextFromContext` is removed as it is no longer needed.
-  Instead, `"go.opentelemetry.io/otel/trace".SpanContextFromContex` can be used to return the current Span.
+  Instead, `"go.opentelemetry.io/otel/trace".SpanContextFromContext` can be used to return the current Span.
   If needed, that Span's `SpanContext.IsRemote()` can then be used to determine if it is remote or not. (#1731)
 - The `HasRemoteParent` field of the `"go.opentelemetry.io/otel/sdk/trace".SamplingParameters` is removed.
   This field is redundant to the information returned from the `Remote` method of the `SpanContext` held in the `ParentContext` field. (#1749)
@@ -2469,7 +2563,7 @@ This release migrates the default OpenTelemetry SDK into its own Go module, deco
 - Prometheus exporter will not apply stale updates or forget inactive metrics. (#903)
 - Add test for api.standard `HTTPClientAttributesFromHTTPRequest`. (#905)
 - Bump github.com/golangci/golangci-lint from 1.27.0 to 1.28.1 in /tools. (#901, #913)
-- Update otel-colector example to use the v0.5.0 collector. (#915)
+- Update otel-collector example to use the v0.5.0 collector. (#915)
 - The `grpctrace` instrumentation uses a span name conforming to the OpenTelemetry semantic conventions (does not contain a leading slash (`/`)). (#922)
 - The `grpctrace` instrumentation includes an `rpc.method` attribute now set to the gRPC method name. (#900, #922)
 - The `grpctrace` instrumentation `rpc.service` attribute now contains the package name if one exists.
@@ -3062,7 +3156,10 @@ It contains api and sdk for trace and meter.
 - CircleCI build CI manifest files.
 - CODEOWNERS file to track owners of this project.
 
-[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.29.0...HEAD
+[Unreleased]: https://github.com/open-telemetry/opentelemetry-go/compare/v1.32.0...HEAD
+[1.32.0/0.54.0/0.8.0/0.0.11]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.32.0
+[1.31.0/0.53.0/0.7.0/0.0.10]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.31.0
+[1.30.0/0.52.0/0.6.0/0.0.9]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.30.0
 [1.29.0/0.51.0/0.5.0]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.29.0
 [1.28.0/0.50.0/0.4.0]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.28.0
 [1.27.0/0.49.0/0.3.0]: https://github.com/open-telemetry/opentelemetry-go/releases/tag/v1.27.0
diff --git a/vendor/go.opentelemetry.io/otel/CODEOWNERS b/vendor/go.opentelemetry.io/otel/CODEOWNERS
index 5904bb70..945a07d2 100644
--- a/vendor/go.opentelemetry.io/otel/CODEOWNERS
+++ b/vendor/go.opentelemetry.io/otel/CODEOWNERS
@@ -12,6 +12,6 @@
 #  https://help.github.com/en/articles/about-code-owners
 #
 
-* @MrAlias @XSAM @dashpole @MadVikingGod @pellared @hanyuancheung @dmathieu
+* @MrAlias @XSAM @dashpole @pellared @dmathieu
 
-CODEOWNERS @MrAlias @MadVikingGod @pellared @dashpole @XSAM @dmathieu
+CODEOWNERS @MrAlias @pellared @dashpole @XSAM @dmathieu
diff --git a/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md b/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md
index b7402576..22a2e9db 100644
--- a/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md
+++ b/vendor/go.opentelemetry.io/otel/CONTRIBUTING.md
@@ -578,7 +578,10 @@ See also:
 The tests should never leak goroutines.
 
 Use the term `ConcurrentSafe` in the test name when it aims to verify the
-absence of race conditions.
+absence of race conditions. The top-level tests with this term will be run
+many times in the `test-concurrent-safe` CI job to increase the chance of
+catching concurrency issues. This does not apply to subtests when this term
+is not in their root name.
 
 ### Internal packages
 
@@ -626,13 +629,14 @@ should be canceled.
 
 ## Approvers and Maintainers
 
-### Approvers
+### Triagers
+
+- [Cheng-Zhen Yang](https://github.com/scorpionknifes), Independent
 
-- [Chester Cheung](https://github.com/hanyuancheung), Tencent
+### Approvers
 
 ### Maintainers
 
-- [Aaron Clawson](https://github.com/MadVikingGod), LightStep
 - [Damien Mathieu](https://github.com/dmathieu), Elastic
 - [David Ashpole](https://github.com/dashpole), Google
 - [Robert Pająk](https://github.com/pellared), Splunk
@@ -641,11 +645,13 @@ should be canceled.
 
 ### Emeritus
 
-- [Liz Fong-Jones](https://github.com/lizthegrey), Honeycomb
-- [Gustavo Silva Paiva](https://github.com/paivagustavo), LightStep
-- [Josh MacDonald](https://github.com/jmacd), LightStep
-- [Anthony Mirabella](https://github.com/Aneurysm9), AWS
-- [Evan Torrie](https://github.com/evantorrie), Yahoo
+- [Aaron Clawson](https://github.com/MadVikingGod)
+- [Anthony Mirabella](https://github.com/Aneurysm9)
+- [Chester Cheung](https://github.com/hanyuancheung)
+- [Evan Torrie](https://github.com/evantorrie)
+- [Gustavo Silva Paiva](https://github.com/paivagustavo)
+- [Josh MacDonald](https://github.com/jmacd)
+- [Liz Fong-Jones](https://github.com/lizthegrey)
 
 ### Become an Approver or a Maintainer
 
diff --git a/vendor/go.opentelemetry.io/otel/Makefile b/vendor/go.opentelemetry.io/otel/Makefile
index 070b1e57..b8292a4f 100644
--- a/vendor/go.opentelemetry.io/otel/Makefile
+++ b/vendor/go.opentelemetry.io/otel/Makefile
@@ -54,9 +54,6 @@ $(TOOLS)/stringer: PACKAGE=golang.org/x/tools/cmd/stringer
 PORTO = $(TOOLS)/porto
 $(TOOLS)/porto: PACKAGE=github.com/jcchavezs/porto/cmd/porto
 
-GOJQ = $(TOOLS)/gojq
-$(TOOLS)/gojq: PACKAGE=github.com/itchyny/gojq/cmd/gojq
-
 GOTMPL = $(TOOLS)/gotmpl
 $(GOTMPL): PACKAGE=go.opentelemetry.io/build-tools/gotmpl
 
@@ -67,7 +64,7 @@ GOVULNCHECK = $(TOOLS)/govulncheck
 $(TOOLS)/govulncheck: PACKAGE=golang.org/x/vuln/cmd/govulncheck
 
 .PHONY: tools
-tools: $(CROSSLINK) $(GOLANGCI_LINT) $(MISSPELL) $(GOCOVMERGE) $(STRINGER) $(PORTO) $(GOJQ) $(SEMCONVGEN) $(MULTIMOD) $(SEMCONVKIT) $(GOTMPL) $(GORELEASE)
+tools: $(CROSSLINK) $(GOLANGCI_LINT) $(MISSPELL) $(GOCOVMERGE) $(STRINGER) $(PORTO) $(SEMCONVGEN) $(MULTIMOD) $(SEMCONVKIT) $(GOTMPL) $(GORELEASE)
 
 # Virtualized python tools via docker
 
@@ -145,12 +142,14 @@ build-tests/%:
 
 # Tests
 
-TEST_TARGETS := test-default test-bench test-short test-verbose test-race
+TEST_TARGETS := test-default test-bench test-short test-verbose test-race test-concurrent-safe
 .PHONY: $(TEST_TARGETS) test
 test-default test-race: ARGS=-race
 test-bench:   ARGS=-run=xxxxxMatchNothingxxxxx -test.benchtime=1ms -bench=.
 test-short:   ARGS=-short
 test-verbose: ARGS=-v -race
+test-concurrent-safe: ARGS=-run=ConcurrentSafe -count=100 -race
+test-concurrent-safe: TIMEOUT=120
 $(TEST_TARGETS): test
 test: $(OTEL_GO_MOD_DIRS:%=test/%)
 test/%: DIR=$*
@@ -261,7 +260,7 @@ SEMCONVPKG ?= "semconv/"
 semconv-generate: $(SEMCONVGEN) $(SEMCONVKIT)
 	[ "$(TAG)" ] || ( echo "TAG unset: missing opentelemetry semantic-conventions tag"; exit 1 )
 	[ "$(OTEL_SEMCONV_REPO)" ] || ( echo "OTEL_SEMCONV_REPO unset: missing path to opentelemetry semantic-conventions repo"; exit 1 )
-	$(SEMCONVGEN) -i "$(OTEL_SEMCONV_REPO)/model/." --only=attribute_group -p conventionType=trace -f attribute_group.go -t "$(SEMCONVPKG)/template.j2" -s "$(TAG)"
+	$(SEMCONVGEN) -i "$(OTEL_SEMCONV_REPO)/model/." --only=attribute_group -p conventionType=trace -f attribute_group.go -z "$(SEMCONVPKG)/capitalizations.txt" -t "$(SEMCONVPKG)/template.j2" -s "$(TAG)"
 	$(SEMCONVGEN) -i "$(OTEL_SEMCONV_REPO)/model/." --only=metric  -f metric.go -t "$(SEMCONVPKG)/metric_template.j2" -s "$(TAG)"
 	$(SEMCONVKIT) -output "$(SEMCONVPKG)/$(TAG)" -tag "$(TAG)"
 
diff --git a/vendor/go.opentelemetry.io/otel/README.md b/vendor/go.opentelemetry.io/otel/README.md
index 657df347..efec2789 100644
--- a/vendor/go.opentelemetry.io/otel/README.md
+++ b/vendor/go.opentelemetry.io/otel/README.md
@@ -51,25 +51,18 @@ Currently, this project supports the following environments.
 |----------|------------|--------------|
 | Ubuntu   | 1.23       | amd64        |
 | Ubuntu   | 1.22       | amd64        |
-| Ubuntu   | 1.21       | amd64        |
 | Ubuntu   | 1.23       | 386          |
 | Ubuntu   | 1.22       | 386          |
-| Ubuntu   | 1.21       | 386          |
 | Linux    | 1.23       | arm64        |
 | Linux    | 1.22       | arm64        |
-| Linux    | 1.21       | arm64        |
 | macOS 13 | 1.23       | amd64        |
 | macOS 13 | 1.22       | amd64        |
-| macOS 13 | 1.21       | amd64        |
 | macOS    | 1.23       | arm64        |
 | macOS    | 1.22       | arm64        |
-| macOS    | 1.21       | arm64        |
 | Windows  | 1.23       | amd64        |
 | Windows  | 1.22       | amd64        |
-| Windows  | 1.21       | amd64        |
 | Windows  | 1.23       | 386          |
 | Windows  | 1.22       | 386          |
-| Windows  | 1.21       | 386          |
 
 While this project should work for other systems, no compatibility guarantees
 are made for those systems currently.
@@ -96,8 +89,8 @@ If you need to extend the telemetry an instrumentation library provides or want
 to build your own instrumentation for your application directly you will need
 to use the
 [Go otel](https://pkg.go.dev/go.opentelemetry.io/otel)
-package. The included [examples](./example/) are a good way to see some
-practical uses of this process.
+package. The [examples](https://github.com/open-telemetry/opentelemetry-go-contrib/tree/main/examples)
+are a good way to see some practical uses of this process.
 
 ### Export
 
diff --git a/vendor/go.opentelemetry.io/otel/RELEASING.md b/vendor/go.opentelemetry.io/otel/RELEASING.md
index 59992984..ffa9b612 100644
--- a/vendor/go.opentelemetry.io/otel/RELEASING.md
+++ b/vendor/go.opentelemetry.io/otel/RELEASING.md
@@ -111,17 +111,6 @@ It is critical you make sure the version you push upstream is correct.
 Finally create a Release for the new `<new tag>` on GitHub.
 The release body should include all the release notes from the Changelog for this release.
 
-## Verify Examples
-
-After releasing verify that examples build outside of the repository.
-
-```
-./verify_examples.sh
-```
-
-The script copies examples into a different directory removes any `replace` declarations in `go.mod` and builds them.
-This ensures they build with the published release, not the local copy.
-
 ## Post-Release
 
 ### Contrib Repository
diff --git a/vendor/go.opentelemetry.io/otel/attribute/set.go b/vendor/go.opentelemetry.io/otel/attribute/set.go
index bff9c7fd..6cbefcea 100644
--- a/vendor/go.opentelemetry.io/otel/attribute/set.go
+++ b/vendor/go.opentelemetry.io/otel/attribute/set.go
@@ -347,45 +347,25 @@ func computeDistinct(kvs []KeyValue) Distinct {
 func computeDistinctFixed(kvs []KeyValue) interface{} {
 	switch len(kvs) {
 	case 1:
-		ptr := new([1]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [1]KeyValue(kvs)
 	case 2:
-		ptr := new([2]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [2]KeyValue(kvs)
 	case 3:
-		ptr := new([3]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [3]KeyValue(kvs)
 	case 4:
-		ptr := new([4]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [4]KeyValue(kvs)
 	case 5:
-		ptr := new([5]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [5]KeyValue(kvs)
 	case 6:
-		ptr := new([6]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [6]KeyValue(kvs)
 	case 7:
-		ptr := new([7]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [7]KeyValue(kvs)
 	case 8:
-		ptr := new([8]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [8]KeyValue(kvs)
 	case 9:
-		ptr := new([9]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [9]KeyValue(kvs)
 	case 10:
-		ptr := new([10]KeyValue)
-		copy((*ptr)[:], kvs)
-		return *ptr
+		return [10]KeyValue(kvs)
 	default:
 		return nil
 	}
diff --git a/vendor/go.opentelemetry.io/otel/baggage/baggage.go b/vendor/go.opentelemetry.io/otel/baggage/baggage.go
index b3569e95..36f53670 100644
--- a/vendor/go.opentelemetry.io/otel/baggage/baggage.go
+++ b/vendor/go.opentelemetry.io/otel/baggage/baggage.go
@@ -50,7 +50,7 @@ type Property struct {
 // component boundaries may impose their own restrictions on Property key.
 // For example, the W3C Baggage specification restricts the Property keys to strings that
 // satisfy the token definition from RFC7230, Section 3.2.6.
-// For maximum compatibility, alpha-numeric value are strongly recommended to be used as Property key.
+// For maximum compatibility, alphanumeric value are strongly recommended to be used as Property key.
 func NewKeyProperty(key string) (Property, error) {
 	if !validateBaggageName(key) {
 		return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key)
@@ -90,7 +90,7 @@ func NewKeyValueProperty(key, value string) (Property, error) {
 // component boundaries may impose their own restrictions on Property key.
 // For example, the W3C Baggage specification restricts the Property keys to strings that
 // satisfy the token definition from RFC7230, Section 3.2.6.
-// For maximum compatibility, alpha-numeric value are strongly recommended to be used as Property key.
+// For maximum compatibility, alphanumeric value are strongly recommended to be used as Property key.
 func NewKeyValuePropertyRaw(key, value string) (Property, error) {
 	if !validateBaggageName(key) {
 		return newInvalidProperty(), fmt.Errorf("%w: %q", errInvalidKey, key)
@@ -287,7 +287,7 @@ func NewMember(key, value string, props ...Property) (Member, error) {
 // component boundaries may impose their own restrictions on baggage key.
 // For example, the W3C Baggage specification restricts the baggage keys to strings that
 // satisfy the token definition from RFC7230, Section 3.2.6.
-// For maximum compatibility, alpha-numeric value are strongly recommended to be used as baggage key.
+// For maximum compatibility, alphanumeric value are strongly recommended to be used as baggage key.
 func NewMemberRaw(key, value string, props ...Property) (Member, error) {
 	m := Member{
 		key:        key,
diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go
index 428cfea2..e0fa0570 100644
--- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go
+++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/client.go
@@ -155,7 +155,12 @@ func (c *client) exportContext(parent context.Context) (context.Context, context
 	}
 
 	if c.metadata.Len() > 0 {
-		ctx = metadata.NewOutgoingContext(ctx, c.metadata)
+		md := c.metadata
+		if outMD, ok := metadata.FromOutgoingContext(ctx); ok {
+			md = metadata.Join(md, outMD)
+		}
+
+		ctx = metadata.NewOutgoingContext(ctx, md)
 	}
 
 	return ctx, cancel
diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go
index b6ed9a2b..c016b4db 100644
--- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go
+++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/options.go
@@ -139,7 +139,7 @@ func NewGRPCConfig(opts ...GRPCOption) Config {
 	if cfg.ServiceConfig != "" {
 		cfg.DialOptions = append(cfg.DialOptions, grpc.WithDefaultServiceConfig(cfg.ServiceConfig))
 	}
-	// Priroritize GRPCCredentials over Insecure (passing both is an error).
+	// Prioritize GRPCCredentials over Insecure (passing both is an error).
 	if cfg.Metrics.GRPCCredentials != nil {
 		cfg.DialOptions = append(cfg.DialOptions, grpc.WithTransportCredentials(cfg.Metrics.GRPCCredentials))
 	} else if cfg.Metrics.Insecure {
@@ -287,9 +287,7 @@ func WithEndpointURL(v string) GenericOption {
 
 		cfg.Metrics.Endpoint = u.Host
 		cfg.Metrics.URLPath = u.Path
-		if u.Scheme != "https" {
-			cfg.Metrics.Insecure = true
-		}
+		cfg.Metrics.Insecure = u.Scheme != "https"
 
 		return cfg
 	})
diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go
index 0229ac80..03e7fbcd 100644
--- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go
+++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/oconf/tls.go
@@ -14,7 +14,7 @@ import (
 )
 
 // ReadTLSConfigFromFile reads a PEM certificate file and creates
-// a tls.Config that will use this certifate to verify a server certificate.
+// a tls.Config that will use this certificate to verify a server certificate.
 func ReadTLSConfigFromFile(path string) (*tls.Config, error) {
 	b, err := os.ReadFile(path)
 	if err != nil {
diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go
index 975e3b7a..abf7f021 100644
--- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go
+++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/transform/metricdata.go
@@ -46,8 +46,9 @@ func ScopeMetrics(sms []metricdata.ScopeMetrics) ([]*mpb.ScopeMetrics, error) {
 
 		out = append(out, &mpb.ScopeMetrics{
 			Scope: &cpb.InstrumentationScope{
-				Name:    sm.Scope.Name,
-				Version: sm.Scope.Version,
+				Name:       sm.Scope.Name,
+				Version:    sm.Scope.Version,
+				Attributes: AttrIter(sm.Scope.Attributes.Iter()),
 			},
 			Metrics:   ms,
 			SchemaUrl: sm.Scope.SchemaURL,
@@ -83,13 +84,13 @@ func metric(m metricdata.Metrics) (*mpb.Metric, error) {
 	}
 	switch a := m.Data.(type) {
 	case metricdata.Gauge[int64]:
-		out.Data = Gauge[int64](a)
+		out.Data = Gauge(a)
 	case metricdata.Gauge[float64]:
-		out.Data = Gauge[float64](a)
+		out.Data = Gauge(a)
 	case metricdata.Sum[int64]:
-		out.Data, err = Sum[int64](a)
+		out.Data, err = Sum(a)
 	case metricdata.Sum[float64]:
-		out.Data, err = Sum[float64](a)
+		out.Data, err = Sum(a)
 	case metricdata.Histogram[int64]:
 		out.Data, err = Histogram(a)
 	case metricdata.Histogram[float64]:
@@ -279,10 +280,7 @@ func Temporality(t metricdata.Temporality) (mpb.AggregationTemporality, error) {
 // timeUnixNano on the zero Time returns 0.
 // The result does not depend on the location associated with t.
 func timeUnixNano(t time.Time) uint64 {
-	if t.IsZero() {
-		return 0
-	}
-	return uint64(t.UnixNano())
+	return uint64(max(0, t.UnixNano())) // nolint:gosec // Overflow checked.
 }
 
 // Exemplars returns a slice of OTLP Exemplars generated from exemplars.
diff --git a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go
index 1046eb59..19b789b8 100644
--- a/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go
+++ b/vendor/go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/version.go
@@ -5,5 +5,5 @@ package otlpmetricgrpc // import "go.opentelemetry.io/otel/exporters/otlp/otlpme
 
 // Version is the current release version of the OpenTelemetry OTLP over gRPC metrics exporter in use.
 func Version() string {
-	return "1.29.0"
+	return "1.32.0"
 }
diff --git a/vendor/go.opentelemetry.io/otel/internal/global/instruments.go b/vendor/go.opentelemetry.io/otel/internal/global/instruments.go
index 3a0cc42f..ae92a425 100644
--- a/vendor/go.opentelemetry.io/otel/internal/global/instruments.go
+++ b/vendor/go.opentelemetry.io/otel/internal/global/instruments.go
@@ -13,7 +13,7 @@ import (
 
 // unwrapper unwraps to return the underlying instrument implementation.
 type unwrapper interface {
-	Unwrap() metric.Observable
+	unwrap() metric.Observable
 }
 
 type afCounter struct {
@@ -40,7 +40,7 @@ func (i *afCounter) setDelegate(m metric.Meter) {
 	i.delegate.Store(ctr)
 }
 
-func (i *afCounter) Unwrap() metric.Observable {
+func (i *afCounter) unwrap() metric.Observable {
 	if ctr := i.delegate.Load(); ctr != nil {
 		return ctr.(metric.Float64ObservableCounter)
 	}
@@ -71,7 +71,7 @@ func (i *afUpDownCounter) setDelegate(m metric.Meter) {
 	i.delegate.Store(ctr)
 }
 
-func (i *afUpDownCounter) Unwrap() metric.Observable {
+func (i *afUpDownCounter) unwrap() metric.Observable {
 	if ctr := i.delegate.Load(); ctr != nil {
 		return ctr.(metric.Float64ObservableUpDownCounter)
 	}
@@ -102,7 +102,7 @@ func (i *afGauge) setDelegate(m metric.Meter) {
 	i.delegate.Store(ctr)
 }
 
-func (i *afGauge) Unwrap() metric.Observable {
+func (i *afGauge) unwrap() metric.Observable {
 	if ctr := i.delegate.Load(); ctr != nil {
 		return ctr.(metric.Float64ObservableGauge)
 	}
@@ -133,7 +133,7 @@ func (i *aiCounter) setDelegate(m metric.Meter) {
 	i.delegate.Store(ctr)
 }
 
-func (i *aiCounter) Unwrap() metric.Observable {
+func (i *aiCounter) unwrap() metric.Observable {
 	if ctr := i.delegate.Load(); ctr != nil {
 		return ctr.(metric.Int64ObservableCounter)
 	}
@@ -164,7 +164,7 @@ func (i *aiUpDownCounter) setDelegate(m metric.Meter) {
 	i.delegate.Store(ctr)
 }
 
-func (i *aiUpDownCounter) Unwrap() metric.Observable {
+func (i *aiUpDownCounter) unwrap() metric.Observable {
 	if ctr := i.delegate.Load(); ctr != nil {
 		return ctr.(metric.Int64ObservableUpDownCounter)
 	}
@@ -195,7 +195,7 @@ func (i *aiGauge) setDelegate(m metric.Meter) {
 	i.delegate.Store(ctr)
 }
 
-func (i *aiGauge) Unwrap() metric.Observable {
+func (i *aiGauge) unwrap() metric.Observable {
 	if ctr := i.delegate.Load(); ctr != nil {
 		return ctr.(metric.Int64ObservableGauge)
 	}
diff --git a/vendor/go.opentelemetry.io/otel/internal/global/meter.go b/vendor/go.opentelemetry.io/otel/internal/global/meter.go
index cfd1df9b..a6acd8dc 100644
--- a/vendor/go.opentelemetry.io/otel/internal/global/meter.go
+++ b/vendor/go.opentelemetry.io/otel/internal/global/meter.go
@@ -5,8 +5,9 @@ package global // import "go.opentelemetry.io/otel/internal/global"
 
 import (
 	"container/list"
+	"context"
+	"reflect"
 	"sync"
-	"sync/atomic"
 
 	"go.opentelemetry.io/otel/metric"
 	"go.opentelemetry.io/otel/metric/embedded"
@@ -66,6 +67,7 @@ func (p *meterProvider) Meter(name string, opts ...metric.MeterOption) metric.Me
 		name:    name,
 		version: c.InstrumentationVersion(),
 		schema:  c.SchemaURL(),
+		attrs:   c.InstrumentationAttributes(),
 	}
 
 	if p.meters == nil {
@@ -76,7 +78,7 @@ func (p *meterProvider) Meter(name string, opts ...metric.MeterOption) metric.Me
 		return val
 	}
 
-	t := &meter{name: name, opts: opts}
+	t := &meter{name: name, opts: opts, instruments: make(map[instID]delegatedInstrument)}
 	p.meters[key] = t
 	return t
 }
@@ -92,17 +94,29 @@ type meter struct {
 	opts []metric.MeterOption
 
 	mtx         sync.Mutex
-	instruments []delegatedInstrument
+	instruments map[instID]delegatedInstrument
 
 	registry list.List
 
-	delegate atomic.Value // metric.Meter
+	delegate metric.Meter
 }
 
 type delegatedInstrument interface {
 	setDelegate(metric.Meter)
 }
 
+// instID are the identifying properties of a instrument.
+type instID struct {
+	// name is the name of the stream.
+	name string
+	// description is the description of the stream.
+	description string
+	// kind defines the functional group of the instrument.
+	kind reflect.Type
+	// unit is the unit of the stream.
+	unit string
+}
+
 // setDelegate configures m to delegate all Meter functionality to Meters
 // created by provider.
 //
@@ -110,12 +124,12 @@ type delegatedInstrument interface {
 //
 // It is guaranteed by the caller that this happens only once.
 func (m *meter) setDelegate(provider metric.MeterProvider) {
-	meter := provider.Meter(m.name, m.opts...)
-	m.delegate.Store(meter)
-
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
+	meter := provider.Meter(m.name, m.opts...)
+	m.delegate = meter
+
 	for _, inst := range m.instruments {
 		inst.setDelegate(meter)
 	}
@@ -133,169 +147,336 @@ func (m *meter) setDelegate(provider metric.MeterProvider) {
 }
 
 func (m *meter) Int64Counter(name string, options ...metric.Int64CounterOption) (metric.Int64Counter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Int64Counter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Int64Counter(name, options...)
+	}
+
+	cfg := metric.NewInt64CounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*siCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Int64Counter), nil
+	}
 	i := &siCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Int64UpDownCounter(name string, options ...metric.Int64UpDownCounterOption) (metric.Int64UpDownCounter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Int64UpDownCounter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Int64UpDownCounter(name, options...)
+	}
+
+	cfg := metric.NewInt64UpDownCounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*siUpDownCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Int64UpDownCounter), nil
+	}
 	i := &siUpDownCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Int64Histogram(name string, options ...metric.Int64HistogramOption) (metric.Int64Histogram, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Int64Histogram(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Int64Histogram(name, options...)
+	}
+
+	cfg := metric.NewInt64HistogramConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*siHistogram)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Int64Histogram), nil
+	}
 	i := &siHistogram{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Int64Gauge(name string, options ...metric.Int64GaugeOption) (metric.Int64Gauge, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Int64Gauge(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Int64Gauge(name, options...)
+	}
+
+	cfg := metric.NewInt64GaugeConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*siGauge)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Int64Gauge), nil
+	}
 	i := &siGauge{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Int64ObservableCounter(name string, options ...metric.Int64ObservableCounterOption) (metric.Int64ObservableCounter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Int64ObservableCounter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Int64ObservableCounter(name, options...)
+	}
+
+	cfg := metric.NewInt64ObservableCounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*aiCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Int64ObservableCounter), nil
+	}
 	i := &aiCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Int64ObservableUpDownCounter(name string, options ...metric.Int64ObservableUpDownCounterOption) (metric.Int64ObservableUpDownCounter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Int64ObservableUpDownCounter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Int64ObservableUpDownCounter(name, options...)
+	}
+
+	cfg := metric.NewInt64ObservableUpDownCounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*aiUpDownCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Int64ObservableUpDownCounter), nil
+	}
 	i := &aiUpDownCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Int64ObservableGauge(name string, options ...metric.Int64ObservableGaugeOption) (metric.Int64ObservableGauge, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Int64ObservableGauge(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Int64ObservableGauge(name, options...)
+	}
+
+	cfg := metric.NewInt64ObservableGaugeConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*aiGauge)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Int64ObservableGauge), nil
+	}
 	i := &aiGauge{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Float64Counter(name string, options ...metric.Float64CounterOption) (metric.Float64Counter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Float64Counter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Float64Counter(name, options...)
+	}
+
+	cfg := metric.NewFloat64CounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*sfCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Float64Counter), nil
+	}
 	i := &sfCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Float64UpDownCounter(name string, options ...metric.Float64UpDownCounterOption) (metric.Float64UpDownCounter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Float64UpDownCounter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Float64UpDownCounter(name, options...)
+	}
+
+	cfg := metric.NewFloat64UpDownCounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*sfUpDownCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Float64UpDownCounter), nil
+	}
 	i := &sfUpDownCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Float64Histogram(name string, options ...metric.Float64HistogramOption) (metric.Float64Histogram, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Float64Histogram(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Float64Histogram(name, options...)
+	}
+
+	cfg := metric.NewFloat64HistogramConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*sfHistogram)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Float64Histogram), nil
+	}
 	i := &sfHistogram{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Float64Gauge(name string, options ...metric.Float64GaugeOption) (metric.Float64Gauge, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Float64Gauge(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Float64Gauge(name, options...)
+	}
+
+	cfg := metric.NewFloat64GaugeConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*sfGauge)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Float64Gauge), nil
+	}
 	i := &sfGauge{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Float64ObservableCounter(name string, options ...metric.Float64ObservableCounterOption) (metric.Float64ObservableCounter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Float64ObservableCounter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Float64ObservableCounter(name, options...)
+	}
+
+	cfg := metric.NewFloat64ObservableCounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*afCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Float64ObservableCounter), nil
+	}
 	i := &afCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Float64ObservableUpDownCounter(name string, options ...metric.Float64ObservableUpDownCounterOption) (metric.Float64ObservableUpDownCounter, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Float64ObservableUpDownCounter(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Float64ObservableUpDownCounter(name, options...)
+	}
+
+	cfg := metric.NewFloat64ObservableUpDownCounterConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*afUpDownCounter)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Float64ObservableUpDownCounter), nil
+	}
 	i := &afUpDownCounter{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 func (m *meter) Float64ObservableGauge(name string, options ...metric.Float64ObservableGaugeOption) (metric.Float64ObservableGauge, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		return del.Float64ObservableGauge(name, options...)
-	}
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
+
+	if m.delegate != nil {
+		return m.delegate.Float64ObservableGauge(name, options...)
+	}
+
+	cfg := metric.NewFloat64ObservableGaugeConfig(options...)
+	id := instID{
+		name:        name,
+		kind:        reflect.TypeOf((*afGauge)(nil)),
+		description: cfg.Description(),
+		unit:        cfg.Unit(),
+	}
+	if f, ok := m.instruments[id]; ok {
+		return f.(metric.Float64ObservableGauge), nil
+	}
 	i := &afGauge{name: name, opts: options}
-	m.instruments = append(m.instruments, i)
+	m.instruments[id] = i
 	return i, nil
 }
 
 // RegisterCallback captures the function that will be called during Collect.
 func (m *meter) RegisterCallback(f metric.Callback, insts ...metric.Observable) (metric.Registration, error) {
-	if del, ok := m.delegate.Load().(metric.Meter); ok {
-		insts = unwrapInstruments(insts)
-		return del.RegisterCallback(f, insts...)
-	}
-
 	m.mtx.Lock()
 	defer m.mtx.Unlock()
 
+	if m.delegate != nil {
+		return m.delegate.RegisterCallback(unwrapCallback(f), unwrapInstruments(insts)...)
+	}
+
 	reg := &registration{instruments: insts, function: f}
 	e := m.registry.PushBack(reg)
 	reg.unreg = func() error {
@@ -307,15 +488,11 @@ func (m *meter) RegisterCallback(f metric.Callback, insts ...metric.Observable)
 	return reg, nil
 }
 
-type wrapped interface {
-	unwrap() metric.Observable
-}
-
 func unwrapInstruments(instruments []metric.Observable) []metric.Observable {
 	out := make([]metric.Observable, 0, len(instruments))
 
 	for _, inst := range instruments {
-		if in, ok := inst.(wrapped); ok {
+		if in, ok := inst.(unwrapper); ok {
 			out = append(out, in.unwrap())
 		} else {
 			out = append(out, inst)
@@ -335,9 +512,61 @@ type registration struct {
 	unregMu sync.Mutex
 }
 
-func (c *registration) setDelegate(m metric.Meter) {
-	insts := unwrapInstruments(c.instruments)
+type unwrapObs struct {
+	embedded.Observer
+	obs metric.Observer
+}
 
+// unwrapFloat64Observable returns an expected metric.Float64Observable after
+// unwrapping the global object.
+func unwrapFloat64Observable(inst metric.Float64Observable) metric.Float64Observable {
+	if unwrapped, ok := inst.(unwrapper); ok {
+		if floatObs, ok := unwrapped.unwrap().(metric.Float64Observable); ok {
+			// Note: if the unwrapped object does not
+			// unwrap as an observable for either of the
+			// predicates here, it means an internal bug in
+			// this package.  We avoid logging an error in
+			// this case, because the SDK has to try its
+			// own type conversion on the object.  The SDK
+			// will see this and be forced to respond with
+			// its own error.
+			//
+			// This code uses a double-nested if statement
+			// to avoid creating a branch that is
+			// impossible to cover.
+			inst = floatObs
+		}
+	}
+	return inst
+}
+
+// unwrapInt64Observable returns an expected metric.Int64Observable after
+// unwrapping the global object.
+func unwrapInt64Observable(inst metric.Int64Observable) metric.Int64Observable {
+	if unwrapped, ok := inst.(unwrapper); ok {
+		if unint, ok := unwrapped.unwrap().(metric.Int64Observable); ok {
+			// See the comment in unwrapFloat64Observable().
+			inst = unint
+		}
+	}
+	return inst
+}
+
+func (uo *unwrapObs) ObserveFloat64(inst metric.Float64Observable, value float64, opts ...metric.ObserveOption) {
+	uo.obs.ObserveFloat64(unwrapFloat64Observable(inst), value, opts...)
+}
+
+func (uo *unwrapObs) ObserveInt64(inst metric.Int64Observable, value int64, opts ...metric.ObserveOption) {
+	uo.obs.ObserveInt64(unwrapInt64Observable(inst), value, opts...)
+}
+
+func unwrapCallback(f metric.Callback) metric.Callback {
+	return func(ctx context.Context, obs metric.Observer) error {
+		return f(ctx, &unwrapObs{obs: obs})
+	}
+}
+
+func (c *registration) setDelegate(m metric.Meter) {
 	c.unregMu.Lock()
 	defer c.unregMu.Unlock()
 
@@ -346,9 +575,10 @@ func (c *registration) setDelegate(m metric.Meter) {
 		return
 	}
 
-	reg, err := m.RegisterCallback(c.function, insts...)
+	reg, err := m.RegisterCallback(unwrapCallback(c.function), unwrapInstruments(c.instruments)...)
 	if err != nil {
 		GetErrorHandler().Handle(err)
+		return
 	}
 
 	c.unreg = reg.Unregister
diff --git a/vendor/go.opentelemetry.io/otel/internal/global/trace.go b/vendor/go.opentelemetry.io/otel/internal/global/trace.go
index e31f442b..ac65262c 100644
--- a/vendor/go.opentelemetry.io/otel/internal/global/trace.go
+++ b/vendor/go.opentelemetry.io/otel/internal/global/trace.go
@@ -87,6 +87,7 @@ func (p *tracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T
 		name:    name,
 		version: c.InstrumentationVersion(),
 		schema:  c.SchemaURL(),
+		attrs:   c.InstrumentationAttributes(),
 	}
 
 	if p.tracers == nil {
@@ -102,7 +103,12 @@ func (p *tracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T
 	return t
 }
 
-type il struct{ name, version, schema string }
+type il struct {
+	name    string
+	version string
+	schema  string
+	attrs   attribute.Set
+}
 
 // tracer is a placeholder for a trace.Tracer.
 //
diff --git a/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go b/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go
index 9b1da2c0..b2fe3e41 100644
--- a/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go
+++ b/vendor/go.opentelemetry.io/otel/internal/rawhelpers.go
@@ -20,7 +20,8 @@ func RawToBool(r uint64) bool {
 }
 
 func Int64ToRaw(i int64) uint64 {
-	return uint64(i)
+	// Assumes original was a valid int64 (overflow not checked).
+	return uint64(i) // nolint: gosec
 }
 
 func RawToInt64(r uint64) int64 {
diff --git a/vendor/go.opentelemetry.io/otel/metric/asyncfloat64.go b/vendor/go.opentelemetry.io/otel/metric/asyncfloat64.go
index cf23db77..f8435d8f 100644
--- a/vendor/go.opentelemetry.io/otel/metric/asyncfloat64.go
+++ b/vendor/go.opentelemetry.io/otel/metric/asyncfloat64.go
@@ -213,7 +213,7 @@ type Float64Observer interface {
 }
 
 // Float64Callback is a function registered with a Meter that makes
-// observations for a Float64Observerable instrument it is registered with.
+// observations for a Float64Observable instrument it is registered with.
 // Calls to the Float64Observer record measurement values for the
 // Float64Observable.
 //
diff --git a/vendor/go.opentelemetry.io/otel/metric/asyncint64.go b/vendor/go.opentelemetry.io/otel/metric/asyncint64.go
index c82ba532..e079aaef 100644
--- a/vendor/go.opentelemetry.io/otel/metric/asyncint64.go
+++ b/vendor/go.opentelemetry.io/otel/metric/asyncint64.go
@@ -212,7 +212,7 @@ type Int64Observer interface {
 }
 
 // Int64Callback is a function registered with a Meter that makes observations
-// for an Int64Observerable instrument it is registered with. Calls to the
+// for an Int64Observable instrument it is registered with. Calls to the
 // Int64Observer record measurement values for the Int64Observable.
 //
 // The function needs to complete in a finite amount of time and the deadline
diff --git a/vendor/go.opentelemetry.io/otel/metric/instrument.go b/vendor/go.opentelemetry.io/otel/metric/instrument.go
index ea52e402..a535782e 100644
--- a/vendor/go.opentelemetry.io/otel/metric/instrument.go
+++ b/vendor/go.opentelemetry.io/otel/metric/instrument.go
@@ -351,7 +351,7 @@ func WithAttributeSet(attributes attribute.Set) MeasurementOption {
 //
 //	cp := make([]attribute.KeyValue, len(attributes))
 //	copy(cp, attributes)
-//	WithAttributes(attribute.NewSet(cp...))
+//	WithAttributeSet(attribute.NewSet(cp...))
 //
 // [attribute.NewSet] may modify the passed attributes so this will make a copy
 // of attributes before creating a set in order to ensure this function is
diff --git a/vendor/go.opentelemetry.io/otel/renovate.json b/vendor/go.opentelemetry.io/otel/renovate.json
index 8c5ac55c..0a29a2f1 100644
--- a/vendor/go.opentelemetry.io/otel/renovate.json
+++ b/vendor/go.opentelemetry.io/otel/renovate.json
@@ -19,6 +19,14 @@
       "matchManagers": ["gomod"],
       "matchDepTypes": ["indirect"],
       "enabled": false
+    },
+    {
+      "matchPackageNames": ["google.golang.org/genproto/googleapis/**"],
+      "groupName": "googleapis"
+    },
+    {
+      "matchPackageNames": ["golang.org/x/**"],
+      "groupName": "golang.org/x"
     }
   ]
 }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go b/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go
index 72811504..34852a47 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/instrumentation/scope.go
@@ -3,6 +3,8 @@
 
 package instrumentation // import "go.opentelemetry.io/otel/sdk/instrumentation"
 
+import "go.opentelemetry.io/otel/attribute"
+
 // Scope represents the instrumentation scope.
 type Scope struct {
 	// Name is the name of the instrumentation scope. This should be the
@@ -12,4 +14,6 @@ type Scope struct {
 	Version string
 	// SchemaURL of the telemetry emitted by the scope.
 	SchemaURL string
+	// Attributes of the telemetry emitted by the scope.
+	Attributes attribute.Set
 }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/config.go b/vendor/go.opentelemetry.io/otel/sdk/metric/config.go
index bbe7bf67..203cd9d6 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/config.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/config.go
@@ -5,17 +5,22 @@ package metric // import "go.opentelemetry.io/otel/sdk/metric"
 
 import (
 	"context"
-	"fmt"
+	"errors"
+	"os"
+	"strings"
 	"sync"
 
+	"go.opentelemetry.io/otel"
+	"go.opentelemetry.io/otel/sdk/metric/exemplar"
 	"go.opentelemetry.io/otel/sdk/resource"
 )
 
 // config contains configuration options for a MeterProvider.
 type config struct {
-	res     *resource.Resource
-	readers []Reader
-	views   []View
+	res            *resource.Resource
+	readers        []Reader
+	views          []View
+	exemplarFilter exemplar.Filter
 }
 
 // readerSignals returns a force-flush and shutdown function for a
@@ -39,25 +44,13 @@ func (c config) readerSignals() (forceFlush, shutdown func(context.Context) erro
 // value.
 func unify(funcs []func(context.Context) error) func(context.Context) error {
 	return func(ctx context.Context) error {
-		var errs []error
+		var err error
 		for _, f := range funcs {
-			if err := f(ctx); err != nil {
-				errs = append(errs, err)
+			if e := f(ctx); e != nil {
+				err = errors.Join(err, e)
 			}
 		}
-		return unifyErrors(errs)
-	}
-}
-
-// unifyErrors combines multiple errors into a single error.
-func unifyErrors(errs []error) error {
-	switch len(errs) {
-	case 0:
-		return nil
-	case 1:
-		return errs[0]
-	default:
-		return fmt.Errorf("%v", errs)
+		return err
 	}
 }
 
@@ -75,7 +68,13 @@ func unifyShutdown(funcs []func(context.Context) error) func(context.Context) er
 
 // newConfig returns a config configured with options.
 func newConfig(options []Option) config {
-	conf := config{res: resource.Default()}
+	conf := config{
+		res:            resource.Default(),
+		exemplarFilter: exemplar.TraceBasedFilter,
+	}
+	for _, o := range meterProviderOptionsFromEnv() {
+		conf = o.apply(conf)
+	}
 	for _, o := range options {
 		conf = o.apply(conf)
 	}
@@ -103,7 +102,11 @@ func (o optionFunc) apply(conf config) config {
 // go.opentelemetry.io/otel/sdk/resource package will be used.
 func WithResource(res *resource.Resource) Option {
 	return optionFunc(func(conf config) config {
-		conf.res = res
+		var err error
+		conf.res, err = resource.Merge(resource.Environment(), res)
+		if err != nil {
+			otel.Handle(err)
+		}
 		return conf
 	})
 }
@@ -135,3 +138,35 @@ func WithView(views ...View) Option {
 		return cfg
 	})
 }
+
+// WithExemplarFilter configures the exemplar filter.
+//
+// The exemplar filter determines which measurements are offered to the
+// exemplar reservoir, but the exemplar reservoir makes the final decision of
+// whether to store an exemplar.
+//
+// By default, the [exemplar.SampledFilter]
+// is used. Exemplars can be entirely disabled by providing the
+// [exemplar.AlwaysOffFilter].
+func WithExemplarFilter(filter exemplar.Filter) Option {
+	return optionFunc(func(cfg config) config {
+		cfg.exemplarFilter = filter
+		return cfg
+	})
+}
+
+func meterProviderOptionsFromEnv() []Option {
+	var opts []Option
+	// https://github.com/open-telemetry/opentelemetry-specification/blob/d4b241f451674e8f611bb589477680341006ad2b/specification/configuration/sdk-environment-variables.md#exemplar
+	const filterEnvKey = "OTEL_METRICS_EXEMPLAR_FILTER"
+
+	switch strings.ToLower(strings.TrimSpace(os.Getenv(filterEnvKey))) {
+	case "always_on":
+		opts = append(opts, WithExemplarFilter(exemplar.AlwaysOnFilter))
+	case "always_off":
+		opts = append(opts, WithExemplarFilter(exemplar.AlwaysOffFilter))
+	case "trace_based":
+		opts = append(opts, WithExemplarFilter(exemplar.TraceBasedFilter))
+	}
+	return opts
+}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go
index 82619da7..0335b8ae 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar.go
@@ -4,51 +4,49 @@
 package metric // import "go.opentelemetry.io/otel/sdk/metric"
 
 import (
-	"os"
 	"runtime"
-	"slices"
 
-	"go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
-	"go.opentelemetry.io/otel/sdk/metric/internal/x"
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/sdk/metric/exemplar"
+	"go.opentelemetry.io/otel/sdk/metric/internal/aggregate"
 )
 
-// reservoirFunc returns the appropriately configured exemplar reservoir
-// creation func based on the passed InstrumentKind and user defined
-// environment variables.
-//
-// Note: This will only return non-nil values when the experimental exemplar
-// feature is enabled and the OTEL_METRICS_EXEMPLAR_FILTER environment variable
-// is not set to always_off.
-func reservoirFunc[N int64 | float64](agg Aggregation) func() exemplar.FilteredReservoir[N] {
-	if !x.Exemplars.Enabled() {
-		return nil
-	}
-	// https://github.com/open-telemetry/opentelemetry-specification/blob/d4b241f451674e8f611bb589477680341006ad2b/specification/configuration/sdk-environment-variables.md#exemplar
-	const filterEnvKey = "OTEL_METRICS_EXEMPLAR_FILTER"
+// ExemplarReservoirProviderSelector selects the
+// [exemplar.ReservoirProvider] to use
+// based on the [Aggregation] of the metric.
+type ExemplarReservoirProviderSelector func(Aggregation) exemplar.ReservoirProvider
 
-	var filter exemplar.Filter
-
-	switch os.Getenv(filterEnvKey) {
-	case "always_on":
-		filter = exemplar.AlwaysOnFilter
-	case "always_off":
-		return exemplar.Drop
-	case "trace_based":
-		fallthrough
-	default:
-		filter = exemplar.SampledFilter
+// reservoirFunc returns the appropriately configured exemplar reservoir
+// creation func based on the passed InstrumentKind and filter configuration.
+func reservoirFunc[N int64 | float64](provider exemplar.ReservoirProvider, filter exemplar.Filter) func(attribute.Set) aggregate.FilteredExemplarReservoir[N] {
+	return func(attrs attribute.Set) aggregate.FilteredExemplarReservoir[N] {
+		return aggregate.NewFilteredExemplarReservoir[N](filter, provider(attrs))
 	}
+}
 
+// DefaultExemplarReservoirProviderSelector returns the default
+// [exemplar.ReservoirProvider] for the
+// provided [Aggregation].
+//
+// For explicit bucket histograms with more than 1 bucket, it uses the
+// [exemplar.HistogramReservoirProvider].
+// For exponential histograms, it uses the
+// [exemplar.FixedSizeReservoirProvider]
+// with a size of min(20, max_buckets).
+// For all other aggregations, it uses the
+// [exemplar.FixedSizeReservoirProvider]
+// with a size equal to the number of CPUs.
+//
+// Exemplar default reservoirs MAY change in a minor version bump. No
+// guarantees are made on the shape or statistical properties of returned
+// exemplars.
+func DefaultExemplarReservoirProviderSelector(agg Aggregation) exemplar.ReservoirProvider {
 	// https://github.com/open-telemetry/opentelemetry-specification/blob/d4b241f451674e8f611bb589477680341006ad2b/specification/metrics/sdk.md#exemplar-defaults
 	// Explicit bucket histogram aggregation with more than 1 bucket will
 	// use AlignedHistogramBucketExemplarReservoir.
 	a, ok := agg.(AggregationExplicitBucketHistogram)
 	if ok && len(a.Boundaries) > 0 {
-		cp := slices.Clone(a.Boundaries)
-		return func() exemplar.FilteredReservoir[N] {
-			bounds := cp
-			return exemplar.NewFilteredReservoir[N](filter, exemplar.Histogram(bounds))
-		}
+		return exemplar.HistogramReservoirProvider(a.Boundaries)
 	}
 
 	var n int
@@ -75,7 +73,5 @@ func reservoirFunc[N int64 | float64](agg Aggregation) func() exemplar.FilteredR
 		}
 	}
 
-	return func() exemplar.FilteredReservoir[N] {
-		return exemplar.NewFilteredReservoir[N](filter, exemplar.FixedSize(n))
-	}
+	return exemplar.FixedSizeReservoirProvider(n)
 }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/README.md b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/README.md
new file mode 100644
index 00000000..d1025f5e
--- /dev/null
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/README.md
@@ -0,0 +1,3 @@
+# Metric SDK Exemplars
+
+[![PkgGoDev](https://pkg.go.dev/badge/go.opentelemetry.io/otel/sdk/metric/exemplar)](https://pkg.go.dev/go.opentelemetry.io/otel/sdk/metric/exemplar)
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/doc.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/doc.go
similarity index 93%
rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/doc.go
rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/doc.go
index 5394f48e..9f238937 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/doc.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/doc.go
@@ -3,4 +3,4 @@
 
 // Package exemplar provides an implementation of the OpenTelemetry exemplar
 // reservoir to be used in metric collection pipelines.
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/exemplar.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/exemplar.go
similarity index 98%
rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/exemplar.go
rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/exemplar.go
index fcaa6a46..1ab69467 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/exemplar.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/exemplar.go
@@ -1,7 +1,7 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
 
 import (
 	"time"
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filter.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/filter.go
similarity index 75%
rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filter.go
rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/filter.go
index 152a069a..b595e2ac 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filter.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/filter.go
@@ -1,7 +1,7 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
 
 import (
 	"context"
@@ -16,10 +16,10 @@ import (
 // Reservoir in making a sampling decision.
 type Filter func(context.Context) bool
 
-// SampledFilter is a [Filter] that will only offer measurements
+// TraceBasedFilter is a [Filter] that will only offer measurements
 // if the passed context associated with the measurement contains a sampled
 // [go.opentelemetry.io/otel/trace.SpanContext].
-func SampledFilter(ctx context.Context) bool {
+func TraceBasedFilter(ctx context.Context) bool {
 	return trace.SpanContextFromContext(ctx).IsSampled()
 }
 
@@ -27,3 +27,8 @@ func SampledFilter(ctx context.Context) bool {
 func AlwaysOnFilter(ctx context.Context) bool {
 	return true
 }
+
+// AlwaysOffFilter is a [Filter] that never offers measurements.
+func AlwaysOffFilter(ctx context.Context) bool {
+	return false
+}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/rand.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/fixed_size_reservoir.go
similarity index 73%
rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/rand.go
rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/fixed_size_reservoir.go
index 199a2608..d4aab0aa 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/rand.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/fixed_size_reservoir.go
@@ -1,31 +1,69 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
 
 import (
 	"context"
 	"math"
 	"math/rand"
-	"sync"
 	"time"
 
 	"go.opentelemetry.io/otel/attribute"
 )
 
-var (
+// FixedSizeReservoirProvider returns a provider of [FixedSizeReservoir].
+func FixedSizeReservoirProvider(k int) ReservoirProvider {
+	return func(_ attribute.Set) Reservoir {
+		return NewFixedSizeReservoir(k)
+	}
+}
+
+// NewFixedSizeReservoir returns a [FixedSizeReservoir] that samples at most
+// k exemplars. If there are k or less measurements made, the Reservoir will
+// sample each one. If there are more than k, the Reservoir will then randomly
+// sample all additional measurement with a decreasing probability.
+func NewFixedSizeReservoir(k int) *FixedSizeReservoir {
+	return newFixedSizeReservoir(newStorage(k))
+}
+
+var _ Reservoir = &FixedSizeReservoir{}
+
+// FixedSizeReservoir is a [Reservoir] that samples at most k exemplars. If
+// there are k or less measurements made, the Reservoir will sample each one.
+// If there are more than k, the Reservoir will then randomly sample all
+// additional measurement with a decreasing probability.
+type FixedSizeReservoir struct {
+	*storage
+
+	// count is the number of measurement seen.
+	count int64
+	// next is the next count that will store a measurement at a random index
+	// once the reservoir has been filled.
+	next int64
+	// w is the largest random number in a distribution that is used to compute
+	// the next next.
+	w float64
+
 	// rng is used to make sampling decisions.
 	//
 	// Do not use crypto/rand. There is no reason for the decrease in performance
 	// given this is not a security sensitive decision.
-	rng = rand.New(rand.NewSource(time.Now().UnixNano()))
-	// Ensure concurrent safe accecess to rng and its underlying source.
-	rngMu sync.Mutex
-)
+	rng *rand.Rand
+}
 
-// random returns, as a float64, a uniform pseudo-random number in the open
-// interval (0.0,1.0).
-func random() float64 {
+func newFixedSizeReservoir(s *storage) *FixedSizeReservoir {
+	r := &FixedSizeReservoir{
+		storage: s,
+		rng:     rand.New(rand.NewSource(time.Now().UnixNano())),
+	}
+	r.reset()
+	return r
+}
+
+// randomFloat64 returns, as a float64, a uniform pseudo-random number in the
+// open interval (0.0,1.0).
+func (r *FixedSizeReservoir) randomFloat64() float64 {
 	// TODO: This does not return a uniform number. rng.Float64 returns a
 	// uniformly random int in [0,2^53) that is divided by 2^53. Meaning it
 	// returns multiples of 2^-53, and not all floating point numbers between 0
@@ -43,40 +81,25 @@ func random() float64 {
 	//
 	// There are likely many other methods to explore here as well.
 
-	rngMu.Lock()
-	defer rngMu.Unlock()
-
-	f := rng.Float64()
+	f := r.rng.Float64()
 	for f == 0 {
-		f = rng.Float64()
+		f = r.rng.Float64()
 	}
 	return f
 }
 
-// FixedSize returns a [Reservoir] that samples at most k exemplars. If there
-// are k or less measurements made, the Reservoir will sample each one. If
-// there are more than k, the Reservoir will then randomly sample all
-// additional measurement with a decreasing probability.
-func FixedSize(k int) Reservoir {
-	r := &randRes{storage: newStorage(k)}
-	r.reset()
-	return r
-}
-
-type randRes struct {
-	*storage
-
-	// count is the number of measurement seen.
-	count int64
-	// next is the next count that will store a measurement at a random index
-	// once the reservoir has been filled.
-	next int64
-	// w is the largest random number in a distribution that is used to compute
-	// the next next.
-	w float64
-}
-
-func (r *randRes) Offer(ctx context.Context, t time.Time, n Value, a []attribute.KeyValue) {
+// Offer accepts the parameters associated with a measurement. The
+// parameters will be stored as an exemplar if the Reservoir decides to
+// sample the measurement.
+//
+// The passed ctx needs to contain any baggage or span that were active
+// when the measurement was made. This information may be used by the
+// Reservoir in making a sampling decision.
+//
+// The time t is the time when the measurement was made. The v and a
+// parameters are the value and dropped (filtered) attributes of the
+// measurement respectively.
+func (r *FixedSizeReservoir) Offer(ctx context.Context, t time.Time, n Value, a []attribute.KeyValue) {
 	// The following algorithm is "Algorithm L" from Li, Kim-Hung (4 December
 	// 1994). "Reservoir-Sampling Algorithms of Time Complexity
 	// O(n(1+log(N/n)))". ACM Transactions on Mathematical Software. 20 (4):
@@ -123,7 +146,7 @@ func (r *randRes) Offer(ctx context.Context, t time.Time, n Value, a []attribute
 	} else {
 		if r.count == r.next {
 			// Overwrite a random existing measurement with the one offered.
-			idx := int(rng.Int63n(int64(cap(r.store))))
+			idx := int(r.rng.Int63n(int64(cap(r.store))))
 			r.store[idx] = newMeasurement(ctx, t, n, a)
 			r.advance()
 		}
@@ -132,7 +155,7 @@ func (r *randRes) Offer(ctx context.Context, t time.Time, n Value, a []attribute
 }
 
 // reset resets r to the initial state.
-func (r *randRes) reset() {
+func (r *FixedSizeReservoir) reset() {
 	// This resets the number of exemplars known.
 	r.count = 0
 	// Random index inserts should only happen after the storage is full.
@@ -147,14 +170,14 @@ func (r *randRes) reset() {
 	// This maps the uniform random number in (0,1) to a geometric distribution
 	// over the same interval. The mean of the distribution is inversely
 	// proportional to the storage capacity.
-	r.w = math.Exp(math.Log(random()) / float64(cap(r.store)))
+	r.w = math.Exp(math.Log(r.randomFloat64()) / float64(cap(r.store)))
 
 	r.advance()
 }
 
 // advance updates the count at which the offered measurement will overwrite an
 // existing exemplar.
-func (r *randRes) advance() {
+func (r *FixedSizeReservoir) advance() {
 	// Calculate the next value in the random number series.
 	//
 	// The current value of r.w is based on the max of a distribution of random
@@ -167,7 +190,7 @@ func (r *randRes) advance() {
 	// therefore the next r.w will be based on the same distribution (i.e.
 	// `max(u_1,u_2,...,u_k)`). Therefore, we can sample the next r.w by
 	// computing the next random number `u` and take r.w as `w * u^(1/k)`.
-	r.w *= math.Exp(math.Log(random()) / float64(cap(r.store)))
+	r.w *= math.Exp(math.Log(r.randomFloat64()) / float64(cap(r.store)))
 	// Use the new random number in the series to calculate the count of the
 	// next measurement that will be stored.
 	//
@@ -178,10 +201,13 @@ func (r *randRes) advance() {
 	//
 	// Important to note, the new r.next will always be at least 1 more than
 	// the last r.next.
-	r.next += int64(math.Log(random())/math.Log(1-r.w)) + 1
+	r.next += int64(math.Log(r.randomFloat64())/math.Log(1-r.w)) + 1
 }
 
-func (r *randRes) Collect(dest *[]Exemplar) {
+// Collect returns all the held exemplars.
+//
+// The Reservoir state is preserved after this call.
+func (r *FixedSizeReservoir) Collect(dest *[]Exemplar) {
 	r.storage.Collect(dest)
 	// Call reset here even though it will reset r.count and restart the random
 	// number series. This will persist any old exemplars as long as no new
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/histogram_reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/histogram_reservoir.go
new file mode 100644
index 00000000..3b76cf30
--- /dev/null
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/histogram_reservoir.go
@@ -0,0 +1,70 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
+
+import (
+	"context"
+	"slices"
+	"sort"
+	"time"
+
+	"go.opentelemetry.io/otel/attribute"
+)
+
+// HistogramReservoirProvider is a provider of [HistogramReservoir].
+func HistogramReservoirProvider(bounds []float64) ReservoirProvider {
+	cp := slices.Clone(bounds)
+	slices.Sort(cp)
+	return func(_ attribute.Set) Reservoir {
+		return NewHistogramReservoir(cp)
+	}
+}
+
+// NewHistogramReservoir returns a [HistogramReservoir] that samples the last
+// measurement that falls within a histogram bucket. The histogram bucket
+// upper-boundaries are define by bounds.
+//
+// The passed bounds must be sorted before calling this function.
+func NewHistogramReservoir(bounds []float64) *HistogramReservoir {
+	return &HistogramReservoir{
+		bounds:  bounds,
+		storage: newStorage(len(bounds) + 1),
+	}
+}
+
+var _ Reservoir = &HistogramReservoir{}
+
+// HistogramReservoir is a [Reservoir] that samples the last measurement that
+// falls within a histogram bucket. The histogram bucket upper-boundaries are
+// define by bounds.
+type HistogramReservoir struct {
+	*storage
+
+	// bounds are bucket bounds in ascending order.
+	bounds []float64
+}
+
+// Offer accepts the parameters associated with a measurement. The
+// parameters will be stored as an exemplar if the Reservoir decides to
+// sample the measurement.
+//
+// The passed ctx needs to contain any baggage or span that were active
+// when the measurement was made. This information may be used by the
+// Reservoir in making a sampling decision.
+//
+// The time t is the time when the measurement was made. The v and a
+// parameters are the value and dropped (filtered) attributes of the
+// measurement respectively.
+func (r *HistogramReservoir) Offer(ctx context.Context, t time.Time, v Value, a []attribute.KeyValue) {
+	var x float64
+	switch v.Type() {
+	case Int64ValueType:
+		x = float64(v.Int64())
+	case Float64ValueType:
+		x = v.Float64()
+	default:
+		panic("unknown value type")
+	}
+	r.store[sort.SearchFloat64s(r.bounds, x)] = newMeasurement(ctx, t, v, a)
+}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/reservoir.go
similarity index 73%
rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/reservoir.go
rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/reservoir.go
index 80fa5955..ba5cd1a6 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/reservoir.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/reservoir.go
@@ -1,7 +1,7 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
 
 import (
 	"context"
@@ -30,3 +30,11 @@ type Reservoir interface {
 	// The Reservoir state is preserved after this call.
 	Collect(dest *[]Exemplar)
 }
+
+// ReservoirProvider creates new [Reservoir]s.
+//
+// The attributes provided are attributes which are kept by the aggregation, and
+// are exclusive with attributes passed to Offer. The combination of these
+// attributes and the attributes passed to Offer is the complete set of
+// attributes a measurement was made with.
+type ReservoirProvider func(attr attribute.Set) Reservoir
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/storage.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/storage.go
similarity index 94%
rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/storage.go
rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/storage.go
index 10b2976f..0e2e26df 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/storage.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/storage.go
@@ -1,7 +1,7 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
 
 import (
 	"context"
@@ -35,7 +35,7 @@ func (r *storage) Collect(dest *[]Exemplar) {
 			continue
 		}
 
-		m.Exemplar(&(*dest)[n])
+		m.exemplar(&(*dest)[n])
 		n++
 	}
 	*dest = (*dest)[:n]
@@ -66,8 +66,8 @@ func newMeasurement(ctx context.Context, ts time.Time, v Value, droppedAttr []at
 	}
 }
 
-// Exemplar returns m as an [Exemplar].
-func (m measurement) Exemplar(dest *Exemplar) {
+// exemplar returns m as an [Exemplar].
+func (m measurement) exemplar(dest *Exemplar) {
 	dest.FilteredAttributes = m.FilteredAttributes
 	dest.Time = m.Time
 	dest.Value = m.Value
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/value.go b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/value.go
similarity index 91%
rename from vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/value.go
rename to vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/value.go
index 1957d6b1..590b089a 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/value.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/exemplar/value.go
@@ -1,7 +1,7 @@
 // Copyright The OpenTelemetry Authors
 // SPDX-License-Identifier: Apache-2.0
 
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+package exemplar // import "go.opentelemetry.io/otel/sdk/metric/exemplar"
 
 import "math"
 
@@ -28,7 +28,8 @@ type Value struct {
 func NewValue[N int64 | float64](value N) Value {
 	switch v := any(value).(type) {
 	case int64:
-		return Value{t: Int64ValueType, val: uint64(v)}
+		// This can be later converted back to int64 (overflow not checked).
+		return Value{t: Int64ValueType, val: uint64(v)} // nolint:gosec
 	case float64:
 		return Value{t: Float64ValueType, val: math.Float64bits(v)}
 	}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go b/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go
index b52a330b..48b723a7 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/instrument.go
@@ -144,6 +144,12 @@ type Stream struct {
 	// Use NewAllowKeysFilter from "go.opentelemetry.io/otel/attribute" to
 	// provide an allow-list of attribute keys here.
 	AttributeFilter attribute.Filter
+	// ExemplarReservoirProvider selects the
+	// [go.opentelemetry.io/otel/sdk/metric/exemplar.ReservoirProvider] based
+	// on the [Aggregation].
+	//
+	// If unspecified, [DefaultExemplarReservoirProviderSelector] is used.
+	ExemplarReservoirProviderSelector ExemplarReservoirProviderSelector
 }
 
 // instID are the identifying properties of a instrument.
@@ -234,8 +240,8 @@ func (i *float64Inst) aggregate(ctx context.Context, val float64, s attribute.Se
 	}
 }
 
-// observablID is a comparable unique identifier of an observable.
-type observablID[N int64 | float64] struct {
+// observableID is a comparable unique identifier of an observable.
+type observableID[N int64 | float64] struct {
 	name        string
 	description string
 	kind        InstrumentKind
@@ -287,7 +293,7 @@ func newInt64Observable(m *meter, kind InstrumentKind, name, desc, u string) int
 
 type observable[N int64 | float64] struct {
 	metric.Observable
-	observablID[N]
+	observableID[N]
 
 	meter           *meter
 	measures        measures[N]
@@ -296,7 +302,7 @@ type observable[N int64 | float64] struct {
 
 func newObservable[N int64 | float64](m *meter, kind InstrumentKind, name, desc, u string) *observable[N] {
 	return &observable[N]{
-		observablID: observablID[N]{
+		observableID: observableID[N]{
 			name:        name,
 			description: desc,
 			kind:        kind,
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go
index b18ee719..fde21933 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/aggregate.go
@@ -8,7 +8,6 @@ import (
 	"time"
 
 	"go.opentelemetry.io/otel/attribute"
-	"go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
 	"go.opentelemetry.io/otel/sdk/metric/metricdata"
 )
 
@@ -38,8 +37,8 @@ type Builder[N int64 | float64] struct {
 	// create new exemplar reservoirs for a new seen attribute set.
 	//
 	// If this is not provided a default factory function that returns an
-	// exemplar.Drop reservoir will be used.
-	ReservoirFunc func() exemplar.FilteredReservoir[N]
+	// dropReservoir reservoir will be used.
+	ReservoirFunc func(attribute.Set) FilteredExemplarReservoir[N]
 	// AggregationLimit is the cardinality limit of measurement attributes. Any
 	// measurement for new attributes once the limit has been reached will be
 	// aggregated into a single aggregate for the "otel.metric.overflow"
@@ -50,12 +49,12 @@ type Builder[N int64 | float64] struct {
 	AggregationLimit int
 }
 
-func (b Builder[N]) resFunc() func() exemplar.FilteredReservoir[N] {
+func (b Builder[N]) resFunc() func(attribute.Set) FilteredExemplarReservoir[N] {
 	if b.ReservoirFunc != nil {
 		return b.ReservoirFunc
 	}
 
-	return exemplar.Drop
+	return dropReservoir
 }
 
 type fltrMeasure[N int64 | float64] func(ctx context.Context, value N, fltrAttr attribute.Set, droppedAttr []attribute.KeyValue)
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/drop.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/drop.go
new file mode 100644
index 00000000..8396faaa
--- /dev/null
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/drop.go
@@ -0,0 +1,27 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggregate"
+
+import (
+	"context"
+
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/sdk/metric/exemplar"
+)
+
+// dropReservoir returns a [FilteredReservoir] that drops all measurements it is offered.
+func dropReservoir[N int64 | float64](attribute.Set) FilteredExemplarReservoir[N] {
+	return &dropRes[N]{}
+}
+
+type dropRes[N int64 | float64] struct{}
+
+// Offer does nothing, all measurements offered will be dropped.
+func (r *dropRes[N]) Offer(context.Context, N, []attribute.KeyValue) {}
+
+// Collect resets dest. No exemplars will ever be returned.
+func (r *dropRes[N]) Collect(dest *[]exemplar.Exemplar) {
+	clear(*dest) // Erase elements to let GC collect objects
+	*dest = (*dest)[:0]
+}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go
index 170ae8e5..25d70994 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exemplar.go
@@ -6,7 +6,7 @@ package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggreg
 import (
 	"sync"
 
-	"go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
+	"go.opentelemetry.io/otel/sdk/metric/exemplar"
 	"go.opentelemetry.io/otel/sdk/metric/metricdata"
 )
 
@@ -17,6 +17,7 @@ var exemplarPool = sync.Pool{
 func collectExemplars[N int64 | float64](out *[]metricdata.Exemplar[N], f func(*[]exemplar.Exemplar)) {
 	dest := exemplarPool.Get().(*[]exemplar.Exemplar)
 	defer func() {
+		clear(*dest) // Erase elements to let GC collect objects.
 		*dest = (*dest)[:0]
 		exemplarPool.Put(dest)
 	}()
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go
index 70734240..b7aa7216 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/exponential_histogram.go
@@ -12,7 +12,6 @@ import (
 
 	"go.opentelemetry.io/otel"
 	"go.opentelemetry.io/otel/attribute"
-	"go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
 	"go.opentelemetry.io/otel/sdk/metric/metricdata"
 )
 
@@ -31,7 +30,7 @@ const (
 // expoHistogramDataPoint is a single data point in an exponential histogram.
 type expoHistogramDataPoint[N int64 | float64] struct {
 	attrs attribute.Set
-	res   exemplar.FilteredReservoir[N]
+	res   FilteredExemplarReservoir[N]
 
 	count uint64
 	min   N
@@ -284,7 +283,7 @@ func (b *expoBuckets) downscale(delta int32) {
 // newExponentialHistogram returns an Aggregator that summarizes a set of
 // measurements as an exponential histogram. Each histogram is scoped by attributes
 // and the aggregation cycle the measurements were made in.
-func newExponentialHistogram[N int64 | float64](maxSize, maxScale int32, noMinMax, noSum bool, limit int, r func() exemplar.FilteredReservoir[N]) *expoHistogram[N] {
+func newExponentialHistogram[N int64 | float64](maxSize, maxScale int32, noMinMax, noSum bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *expoHistogram[N] {
 	return &expoHistogram[N]{
 		noSum:    noSum,
 		noMinMax: noMinMax,
@@ -307,7 +306,7 @@ type expoHistogram[N int64 | float64] struct {
 	maxSize  int
 	maxScale int32
 
-	newRes   func() exemplar.FilteredReservoir[N]
+	newRes   func(attribute.Set) FilteredExemplarReservoir[N]
 	limit    limiter[*expoHistogramDataPoint[N]]
 	values   map[attribute.Distinct]*expoHistogramDataPoint[N]
 	valuesMu sync.Mutex
@@ -328,7 +327,7 @@ func (e *expoHistogram[N]) measure(ctx context.Context, value N, fltrAttr attrib
 	v, ok := e.values[attr.Equivalent()]
 	if !ok {
 		v = newExpoHistogramDataPoint[N](attr, e.maxSize, e.maxScale, e.noMinMax, e.noSum)
-		v.res = e.newRes()
+		v.res = e.newRes(attr)
 
 		e.values[attr.Equivalent()] = v
 	}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/filtered_reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/filtered_reservoir.go
new file mode 100644
index 00000000..691a9106
--- /dev/null
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/filtered_reservoir.go
@@ -0,0 +1,50 @@
+// Copyright The OpenTelemetry Authors
+// SPDX-License-Identifier: Apache-2.0
+
+package aggregate // import "go.opentelemetry.io/otel/sdk/metric/internal/aggregate"
+
+import (
+	"context"
+	"time"
+
+	"go.opentelemetry.io/otel/attribute"
+	"go.opentelemetry.io/otel/sdk/metric/exemplar"
+)
+
+// FilteredExemplarReservoir wraps a [exemplar.Reservoir] with a filter.
+type FilteredExemplarReservoir[N int64 | float64] interface {
+	// Offer accepts the parameters associated with a measurement. The
+	// parameters will be stored as an exemplar if the filter decides to
+	// sample the measurement.
+	//
+	// The passed ctx needs to contain any baggage or span that were active
+	// when the measurement was made. This information may be used by the
+	// Reservoir in making a sampling decision.
+	Offer(ctx context.Context, val N, attr []attribute.KeyValue)
+	// Collect returns all the held exemplars in the reservoir.
+	Collect(dest *[]exemplar.Exemplar)
+}
+
+// filteredExemplarReservoir handles the pre-sampled exemplar of measurements made.
+type filteredExemplarReservoir[N int64 | float64] struct {
+	filter    exemplar.Filter
+	reservoir exemplar.Reservoir
+}
+
+// NewFilteredExemplarReservoir creates a [FilteredExemplarReservoir] which only offers values
+// that are allowed by the filter.
+func NewFilteredExemplarReservoir[N int64 | float64](f exemplar.Filter, r exemplar.Reservoir) FilteredExemplarReservoir[N] {
+	return &filteredExemplarReservoir[N]{
+		filter:    f,
+		reservoir: r,
+	}
+}
+
+func (f *filteredExemplarReservoir[N]) Offer(ctx context.Context, val N, attr []attribute.KeyValue) {
+	if f.filter(ctx) {
+		// only record the current time if we are sampling this measurement.
+		f.reservoir.Offer(ctx, time.Now(), exemplar.NewValue(val), attr)
+	}
+}
+
+func (f *filteredExemplarReservoir[N]) Collect(dest *[]exemplar.Exemplar) { f.reservoir.Collect(dest) }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go
index ade0941f..d577ae2c 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/histogram.go
@@ -11,13 +11,12 @@ import (
 	"time"
 
 	"go.opentelemetry.io/otel/attribute"
-	"go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
 	"go.opentelemetry.io/otel/sdk/metric/metricdata"
 )
 
 type buckets[N int64 | float64] struct {
 	attrs attribute.Set
-	res   exemplar.FilteredReservoir[N]
+	res   FilteredExemplarReservoir[N]
 
 	counts   []uint64
 	count    uint64
@@ -48,13 +47,13 @@ type histValues[N int64 | float64] struct {
 	noSum  bool
 	bounds []float64
 
-	newRes   func() exemplar.FilteredReservoir[N]
+	newRes   func(attribute.Set) FilteredExemplarReservoir[N]
 	limit    limiter[*buckets[N]]
 	values   map[attribute.Distinct]*buckets[N]
 	valuesMu sync.Mutex
 }
 
-func newHistValues[N int64 | float64](bounds []float64, noSum bool, limit int, r func() exemplar.FilteredReservoir[N]) *histValues[N] {
+func newHistValues[N int64 | float64](bounds []float64, noSum bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *histValues[N] {
 	// The responsibility of keeping all buckets correctly associated with the
 	// passed boundaries is ultimately this type's responsibility. Make a copy
 	// here so we can always guarantee this. Or, in the case of failure, have
@@ -94,7 +93,7 @@ func (s *histValues[N]) measure(ctx context.Context, value N, fltrAttr attribute
 		//
 		//   buckets = (-∞, 0], (0, 5.0], (5.0, 10.0], (10.0, +∞)
 		b = newBuckets[N](attr, len(s.bounds)+1)
-		b.res = s.newRes()
+		b.res = s.newRes(attr)
 
 		// Ensure min and max are recorded values (not zero), for new buckets.
 		b.min, b.max = value, value
@@ -109,7 +108,7 @@ func (s *histValues[N]) measure(ctx context.Context, value N, fltrAttr attribute
 
 // newHistogram returns an Aggregator that summarizes a set of measurements as
 // an histogram.
-func newHistogram[N int64 | float64](boundaries []float64, noMinMax, noSum bool, limit int, r func() exemplar.FilteredReservoir[N]) *histogram[N] {
+func newHistogram[N int64 | float64](boundaries []float64, noMinMax, noSum bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *histogram[N] {
 	return &histogram[N]{
 		histValues: newHistValues[N](boundaries, noSum, limit, r),
 		noMinMax:   noMinMax,
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go
index c3593684..d3a93f08 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/lastvalue.go
@@ -9,7 +9,6 @@ import (
 	"time"
 
 	"go.opentelemetry.io/otel/attribute"
-	"go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
 	"go.opentelemetry.io/otel/sdk/metric/metricdata"
 )
 
@@ -17,10 +16,10 @@ import (
 type datapoint[N int64 | float64] struct {
 	attrs attribute.Set
 	value N
-	res   exemplar.FilteredReservoir[N]
+	res   FilteredExemplarReservoir[N]
 }
 
-func newLastValue[N int64 | float64](limit int, r func() exemplar.FilteredReservoir[N]) *lastValue[N] {
+func newLastValue[N int64 | float64](limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *lastValue[N] {
 	return &lastValue[N]{
 		newRes: r,
 		limit:  newLimiter[datapoint[N]](limit),
@@ -33,7 +32,7 @@ func newLastValue[N int64 | float64](limit int, r func() exemplar.FilteredReserv
 type lastValue[N int64 | float64] struct {
 	sync.Mutex
 
-	newRes func() exemplar.FilteredReservoir[N]
+	newRes func(attribute.Set) FilteredExemplarReservoir[N]
 	limit  limiter[datapoint[N]]
 	values map[attribute.Distinct]datapoint[N]
 	start  time.Time
@@ -46,7 +45,7 @@ func (s *lastValue[N]) measure(ctx context.Context, value N, fltrAttr attribute.
 	attr := s.limit.Attributes(fltrAttr, s.values)
 	d, ok := s.values[attr.Equivalent()]
 	if !ok {
-		d.res = s.newRes()
+		d.res = s.newRes(attr)
 	}
 
 	d.attrs = attr
@@ -115,7 +114,7 @@ func (s *lastValue[N]) copyDpts(dest *[]metricdata.DataPoint[N], t time.Time) in
 
 // newPrecomputedLastValue returns an aggregator that summarizes a set of
 // observations as the last one made.
-func newPrecomputedLastValue[N int64 | float64](limit int, r func() exemplar.FilteredReservoir[N]) *precomputedLastValue[N] {
+func newPrecomputedLastValue[N int64 | float64](limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *precomputedLastValue[N] {
 	return &precomputedLastValue[N]{lastValue: newLastValue[N](limit, r)}
 }
 
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go
index 89136692..8e132ad6 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/aggregate/sum.go
@@ -9,25 +9,24 @@ import (
 	"time"
 
 	"go.opentelemetry.io/otel/attribute"
-	"go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
 	"go.opentelemetry.io/otel/sdk/metric/metricdata"
 )
 
 type sumValue[N int64 | float64] struct {
 	n     N
-	res   exemplar.FilteredReservoir[N]
+	res   FilteredExemplarReservoir[N]
 	attrs attribute.Set
 }
 
 // valueMap is the storage for sums.
 type valueMap[N int64 | float64] struct {
 	sync.Mutex
-	newRes func() exemplar.FilteredReservoir[N]
+	newRes func(attribute.Set) FilteredExemplarReservoir[N]
 	limit  limiter[sumValue[N]]
 	values map[attribute.Distinct]sumValue[N]
 }
 
-func newValueMap[N int64 | float64](limit int, r func() exemplar.FilteredReservoir[N]) *valueMap[N] {
+func newValueMap[N int64 | float64](limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *valueMap[N] {
 	return &valueMap[N]{
 		newRes: r,
 		limit:  newLimiter[sumValue[N]](limit),
@@ -42,7 +41,7 @@ func (s *valueMap[N]) measure(ctx context.Context, value N, fltrAttr attribute.S
 	attr := s.limit.Attributes(fltrAttr, s.values)
 	v, ok := s.values[attr.Equivalent()]
 	if !ok {
-		v.res = s.newRes()
+		v.res = s.newRes(attr)
 	}
 
 	v.attrs = attr
@@ -55,7 +54,7 @@ func (s *valueMap[N]) measure(ctx context.Context, value N, fltrAttr attribute.S
 // newSum returns an aggregator that summarizes a set of measurements as their
 // arithmetic sum. Each sum is scoped by attributes and the aggregation cycle
 // the measurements were made in.
-func newSum[N int64 | float64](monotonic bool, limit int, r func() exemplar.FilteredReservoir[N]) *sum[N] {
+func newSum[N int64 | float64](monotonic bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *sum[N] {
 	return &sum[N]{
 		valueMap:  newValueMap[N](limit, r),
 		monotonic: monotonic,
@@ -142,9 +141,9 @@ func (s *sum[N]) cumulative(dest *metricdata.Aggregation) int {
 }
 
 // newPrecomputedSum returns an aggregator that summarizes a set of
-// observatrions as their arithmetic sum. Each sum is scoped by attributes and
+// observations as their arithmetic sum. Each sum is scoped by attributes and
 // the aggregation cycle the measurements were made in.
-func newPrecomputedSum[N int64 | float64](monotonic bool, limit int, r func() exemplar.FilteredReservoir[N]) *precomputedSum[N] {
+func newPrecomputedSum[N int64 | float64](monotonic bool, limit int, r func(attribute.Set) FilteredExemplarReservoir[N]) *precomputedSum[N] {
 	return &precomputedSum[N]{
 		valueMap:  newValueMap[N](limit, r),
 		monotonic: monotonic,
@@ -152,7 +151,7 @@ func newPrecomputedSum[N int64 | float64](monotonic bool, limit int, r func() ex
 	}
 }
 
-// precomputedSum summarizes a set of observatrions as their arithmetic sum.
+// precomputedSum summarizes a set of observations as their arithmetic sum.
 type precomputedSum[N int64 | float64] struct {
 	*valueMap[N]
 
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/drop.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/drop.go
deleted file mode 100644
index 5a0f39ae..00000000
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/drop.go
+++ /dev/null
@@ -1,23 +0,0 @@
-// Copyright The OpenTelemetry Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
-
-import (
-	"context"
-
-	"go.opentelemetry.io/otel/attribute"
-)
-
-// Drop returns a [FilteredReservoir] that drops all measurements it is offered.
-func Drop[N int64 | float64]() FilteredReservoir[N] { return &dropRes[N]{} }
-
-type dropRes[N int64 | float64] struct{}
-
-// Offer does nothing, all measurements offered will be dropped.
-func (r *dropRes[N]) Offer(context.Context, N, []attribute.KeyValue) {}
-
-// Collect resets dest. No exemplars will ever be returned.
-func (r *dropRes[N]) Collect(dest *[]Exemplar) {
-	*dest = (*dest)[:0]
-}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filtered_reservoir.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filtered_reservoir.go
deleted file mode 100644
index 9fedfa4b..00000000
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/filtered_reservoir.go
+++ /dev/null
@@ -1,49 +0,0 @@
-// Copyright The OpenTelemetry Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
-
-import (
-	"context"
-	"time"
-
-	"go.opentelemetry.io/otel/attribute"
-)
-
-// FilteredReservoir wraps a [Reservoir] with a filter.
-type FilteredReservoir[N int64 | float64] interface {
-	// Offer accepts the parameters associated with a measurement. The
-	// parameters will be stored as an exemplar if the filter decides to
-	// sample the measurement.
-	//
-	// The passed ctx needs to contain any baggage or span that were active
-	// when the measurement was made. This information may be used by the
-	// Reservoir in making a sampling decision.
-	Offer(ctx context.Context, val N, attr []attribute.KeyValue)
-	// Collect returns all the held exemplars in the reservoir.
-	Collect(dest *[]Exemplar)
-}
-
-// filteredReservoir handles the pre-sampled exemplar of measurements made.
-type filteredReservoir[N int64 | float64] struct {
-	filter    Filter
-	reservoir Reservoir
-}
-
-// NewFilteredReservoir creates a [FilteredReservoir] which only offers values
-// that are allowed by the filter.
-func NewFilteredReservoir[N int64 | float64](f Filter, r Reservoir) FilteredReservoir[N] {
-	return &filteredReservoir[N]{
-		filter:    f,
-		reservoir: r,
-	}
-}
-
-func (f *filteredReservoir[N]) Offer(ctx context.Context, val N, attr []attribute.KeyValue) {
-	if f.filter(ctx) {
-		// only record the current time if we are sampling this measurment.
-		f.reservoir.Offer(ctx, time.Now(), NewValue(val), attr)
-	}
-}
-
-func (f *filteredReservoir[N]) Collect(dest *[]Exemplar) { f.reservoir.Collect(dest) }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/hist.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/hist.go
deleted file mode 100644
index a6ff86d0..00000000
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/exemplar/hist.go
+++ /dev/null
@@ -1,46 +0,0 @@
-// Copyright The OpenTelemetry Authors
-// SPDX-License-Identifier: Apache-2.0
-
-package exemplar // import "go.opentelemetry.io/otel/sdk/metric/internal/exemplar"
-
-import (
-	"context"
-	"slices"
-	"sort"
-	"time"
-
-	"go.opentelemetry.io/otel/attribute"
-)
-
-// Histogram returns a [Reservoir] that samples the last measurement that falls
-// within a histogram bucket. The histogram bucket upper-boundaries are define
-// by bounds.
-//
-// The passed bounds will be sorted by this function.
-func Histogram(bounds []float64) Reservoir {
-	slices.Sort(bounds)
-	return &histRes{
-		bounds:  bounds,
-		storage: newStorage(len(bounds) + 1),
-	}
-}
-
-type histRes struct {
-	*storage
-
-	// bounds are bucket bounds in ascending order.
-	bounds []float64
-}
-
-func (r *histRes) Offer(ctx context.Context, t time.Time, v Value, a []attribute.KeyValue) {
-	var x float64
-	switch v.Type() {
-	case Int64ValueType:
-		x = float64(v.Int64())
-	case Float64ValueType:
-		x = v.Float64()
-	default:
-		panic("unknown value type")
-	}
-	r.store[sort.SearchFloat64s(r.bounds, x)] = newMeasurement(ctx, t, v, a)
-}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go
index 8cd2f374..08919937 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/internal/x/x.go
@@ -10,39 +10,23 @@ package x // import "go.opentelemetry.io/otel/sdk/metric/internal/x"
 import (
 	"os"
 	"strconv"
-	"strings"
 )
 
-var (
-	// Exemplars is an experimental feature flag that defines if exemplars
-	// should be recorded for metric data-points.
-	//
-	// To enable this feature set the OTEL_GO_X_EXEMPLAR environment variable
-	// to the case-insensitive string value of "true" (i.e. "True" and "TRUE"
-	// will also enable this).
-	Exemplars = newFeature("EXEMPLAR", func(v string) (string, bool) {
-		if strings.ToLower(v) == "true" {
-			return v, true
-		}
-		return "", false
-	})
-
-	// CardinalityLimit is an experimental feature flag that defines if
-	// cardinality limits should be applied to the recorded metric data-points.
-	//
-	// To enable this feature set the OTEL_GO_X_CARDINALITY_LIMIT environment
-	// variable to the integer limit value you want to use.
-	//
-	// Setting OTEL_GO_X_CARDINALITY_LIMIT to a value less than or equal to 0
-	// will disable the cardinality limits.
-	CardinalityLimit = newFeature("CARDINALITY_LIMIT", func(v string) (int, bool) {
-		n, err := strconv.Atoi(v)
-		if err != nil {
-			return 0, false
-		}
-		return n, true
-	})
-)
+// CardinalityLimit is an experimental feature flag that defines if
+// cardinality limits should be applied to the recorded metric data-points.
+//
+// To enable this feature set the OTEL_GO_X_CARDINALITY_LIMIT environment
+// variable to the integer limit value you want to use.
+//
+// Setting OTEL_GO_X_CARDINALITY_LIMIT to a value less than or equal to 0
+// will disable the cardinality limits.
+var CardinalityLimit = newFeature("CARDINALITY_LIMIT", func(v string) (int, bool) {
+	n, err := strconv.Atoi(v)
+	if err != nil {
+		return 0, false
+	}
+	return n, true
+})
 
 // Feature is an experimental feature control flag. It provides a uniform way
 // to interact with these feature flags and parse their values.
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go b/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go
index e0fd86ca..c495985b 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/manual_reader.go
@@ -113,18 +113,17 @@ func (mr *ManualReader) Collect(ctx context.Context, rm *metricdata.ResourceMetr
 	if err != nil {
 		return err
 	}
-	var errs []error
 	for _, producer := range mr.externalProducers.Load().([]Producer) {
-		externalMetrics, err := producer.Produce(ctx)
-		if err != nil {
-			errs = append(errs, err)
+		externalMetrics, e := producer.Produce(ctx)
+		if e != nil {
+			err = errors.Join(err, e)
 		}
 		rm.ScopeMetrics = append(rm.ScopeMetrics, externalMetrics...)
 	}
 
 	global.Debug("ManualReader collection", "Data", rm)
 
-	return unifyErrors(errs)
+	return err
 }
 
 // MarshalLog returns logging data about the ManualReader.
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go b/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go
index 2309e5b2..823cdf2c 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/meter.go
@@ -150,6 +150,11 @@ func (m *meter) int64ObservableInstrument(id Instrument, callbacks []metric.Int6
 				continue
 			}
 			inst.appendMeasures(in)
+
+			// Add the measures to the pipeline. It is required to maintain
+			// measures per pipeline to avoid calling the measure that
+			// is not part of the pipeline.
+			insert.pipeline.addInt64Measure(inst.observableID, in)
 			for _, cback := range callbacks {
 				inst := int64Observer{measures: in}
 				fn := cback
@@ -309,6 +314,11 @@ func (m *meter) float64ObservableInstrument(id Instrument, callbacks []metric.Fl
 				continue
 			}
 			inst.appendMeasures(in)
+
+			// Add the measures to the pipeline. It is required to maintain
+			// measures per pipeline to avoid calling the measure that
+			// is not part of the pipeline.
+			insert.pipeline.addFloat64Measure(inst.observableID, in)
 			for _, cback := range callbacks {
 				inst := float64Observer{measures: in}
 				fn := cback
@@ -441,73 +451,80 @@ func (m *meter) RegisterCallback(f metric.Callback, insts ...metric.Observable)
 		return noopRegister{}, nil
 	}
 
-	reg := newObserver()
-	var errs multierror
+	var err error
+	validInstruments := make([]metric.Observable, 0, len(insts))
 	for _, inst := range insts {
-		// Unwrap any global.
-		if u, ok := inst.(interface {
-			Unwrap() metric.Observable
-		}); ok {
-			inst = u.Unwrap()
-		}
-
 		switch o := inst.(type) {
 		case int64Observable:
-			if err := o.registerable(m); err != nil {
-				if !errors.Is(err, errEmptyAgg) {
-					errs.append(err)
+			if e := o.registerable(m); e != nil {
+				if !errors.Is(e, errEmptyAgg) {
+					err = errors.Join(err, e)
 				}
 				continue
 			}
-			reg.registerInt64(o.observablID)
+
+			validInstruments = append(validInstruments, inst)
 		case float64Observable:
-			if err := o.registerable(m); err != nil {
-				if !errors.Is(err, errEmptyAgg) {
-					errs.append(err)
+			if e := o.registerable(m); e != nil {
+				if !errors.Is(e, errEmptyAgg) {
+					err = errors.Join(err, e)
 				}
 				continue
 			}
-			reg.registerFloat64(o.observablID)
+
+			validInstruments = append(validInstruments, inst)
 		default:
 			// Instrument external to the SDK.
 			return nil, fmt.Errorf("invalid observable: from different implementation")
 		}
 	}
 
-	err := errs.errorOrNil()
-	if reg.len() == 0 {
+	if len(validInstruments) == 0 {
 		// All insts use drop aggregation or are invalid.
 		return noopRegister{}, err
 	}
 
-	// Some or all instruments were valid.
-	cback := func(ctx context.Context) error { return f(ctx, reg) }
-	return m.pipes.registerMultiCallback(cback), err
+	unregs := make([]func(), len(m.pipes))
+	for ix, pipe := range m.pipes {
+		reg := newObserver(pipe)
+		for _, inst := range validInstruments {
+			switch o := inst.(type) {
+			case int64Observable:
+				reg.registerInt64(o.observableID)
+			case float64Observable:
+				reg.registerFloat64(o.observableID)
+			}
+		}
+
+		// Some or all instruments were valid.
+		cBack := func(ctx context.Context) error { return f(ctx, reg) }
+		unregs[ix] = pipe.addMultiCallback(cBack)
+	}
+
+	return unregisterFuncs{f: unregs}, err
 }
 
 type observer struct {
 	embedded.Observer
 
-	float64 map[observablID[float64]]struct{}
-	int64   map[observablID[int64]]struct{}
+	pipe    *pipeline
+	float64 map[observableID[float64]]struct{}
+	int64   map[observableID[int64]]struct{}
 }
 
-func newObserver() observer {
+func newObserver(p *pipeline) observer {
 	return observer{
-		float64: make(map[observablID[float64]]struct{}),
-		int64:   make(map[observablID[int64]]struct{}),
+		pipe:    p,
+		float64: make(map[observableID[float64]]struct{}),
+		int64:   make(map[observableID[int64]]struct{}),
 	}
 }
 
-func (r observer) len() int {
-	return len(r.float64) + len(r.int64)
-}
-
-func (r observer) registerFloat64(id observablID[float64]) {
+func (r observer) registerFloat64(id observableID[float64]) {
 	r.float64[id] = struct{}{}
 }
 
-func (r observer) registerInt64(id observablID[int64]) {
+func (r observer) registerInt64(id observableID[int64]) {
 	r.int64[id] = struct{}{}
 }
 
@@ -521,22 +538,12 @@ func (r observer) ObserveFloat64(o metric.Float64Observable, v float64, opts ...
 	switch conv := o.(type) {
 	case float64Observable:
 		oImpl = conv
-	case interface {
-		Unwrap() metric.Observable
-	}:
-		// Unwrap any global.
-		async := conv.Unwrap()
-		var ok bool
-		if oImpl, ok = async.(float64Observable); !ok {
-			global.Error(errUnknownObserver, "failed to record asynchronous")
-			return
-		}
 	default:
 		global.Error(errUnknownObserver, "failed to record")
 		return
 	}
 
-	if _, registered := r.float64[oImpl.observablID]; !registered {
+	if _, registered := r.float64[oImpl.observableID]; !registered {
 		if !oImpl.dropAggregation {
 			global.Error(errUnregObserver, "failed to record",
 				"name", oImpl.name,
@@ -548,7 +555,12 @@ func (r observer) ObserveFloat64(o metric.Float64Observable, v float64, opts ...
 		return
 	}
 	c := metric.NewObserveConfig(opts)
-	oImpl.observe(v, c.Attributes())
+	// Access to r.pipe.float64Measure is already guarded by a lock in pipeline.produce.
+	// TODO (#5946): Refactor pipeline and observable measures.
+	measures := r.pipe.float64Measures[oImpl.observableID]
+	for _, m := range measures {
+		m(context.Background(), v, c.Attributes())
+	}
 }
 
 func (r observer) ObserveInt64(o metric.Int64Observable, v int64, opts ...metric.ObserveOption) {
@@ -556,22 +568,12 @@ func (r observer) ObserveInt64(o metric.Int64Observable, v int64, opts ...metric
 	switch conv := o.(type) {
 	case int64Observable:
 		oImpl = conv
-	case interface {
-		Unwrap() metric.Observable
-	}:
-		// Unwrap any global.
-		async := conv.Unwrap()
-		var ok bool
-		if oImpl, ok = async.(int64Observable); !ok {
-			global.Error(errUnknownObserver, "failed to record asynchronous")
-			return
-		}
 	default:
 		global.Error(errUnknownObserver, "failed to record")
 		return
 	}
 
-	if _, registered := r.int64[oImpl.observablID]; !registered {
+	if _, registered := r.int64[oImpl.observableID]; !registered {
 		if !oImpl.dropAggregation {
 			global.Error(errUnregObserver, "failed to record",
 				"name", oImpl.name,
@@ -583,7 +585,12 @@ func (r observer) ObserveInt64(o metric.Int64Observable, v int64, opts ...metric
 		return
 	}
 	c := metric.NewObserveConfig(opts)
-	oImpl.observe(v, c.Attributes())
+	// Access to r.pipe.int64Measures is already guarded b a lock in pipeline.produce.
+	// TODO (#5946): Refactor pipeline and observable measures.
+	measures := r.pipe.int64Measures[oImpl.observableID]
+	for _, m := range measures {
+		m(context.Background(), v, c.Attributes())
+	}
 }
 
 type noopRegister struct{ embedded.Registration }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go b/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go
index 67ee1b11..dcd2182d 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/periodic_reader.go
@@ -251,18 +251,17 @@ func (r *PeriodicReader) collect(ctx context.Context, p interface{}, rm *metricd
 	if err != nil {
 		return err
 	}
-	var errs []error
 	for _, producer := range r.externalProducers.Load().([]Producer) {
-		externalMetrics, err := producer.Produce(ctx)
-		if err != nil {
-			errs = append(errs, err)
+		externalMetrics, e := producer.Produce(ctx)
+		if e != nil {
+			err = errors.Join(err, e)
 		}
 		rm.ScopeMetrics = append(rm.ScopeMetrics, externalMetrics...)
 	}
 
 	global.Debug("PeriodicReader collection", "Data", rm)
 
-	return unifyErrors(errs)
+	return err
 }
 
 // export exports metric data m using r's exporter.
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go b/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go
index 823bf2fe..775e2452 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/pipeline.go
@@ -8,14 +8,13 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"strings"
 	"sync"
 	"sync/atomic"
 
 	"go.opentelemetry.io/otel/internal/global"
-	"go.opentelemetry.io/otel/metric"
 	"go.opentelemetry.io/otel/metric/embedded"
 	"go.opentelemetry.io/otel/sdk/instrumentation"
+	"go.opentelemetry.io/otel/sdk/metric/exemplar"
 	"go.opentelemetry.io/otel/sdk/metric/internal"
 	"go.opentelemetry.io/otel/sdk/metric/internal/aggregate"
 	"go.opentelemetry.io/otel/sdk/metric/internal/x"
@@ -38,14 +37,17 @@ type instrumentSync struct {
 	compAgg     aggregate.ComputeAggregation
 }
 
-func newPipeline(res *resource.Resource, reader Reader, views []View) *pipeline {
+func newPipeline(res *resource.Resource, reader Reader, views []View, exemplarFilter exemplar.Filter) *pipeline {
 	if res == nil {
 		res = resource.Empty()
 	}
 	return &pipeline{
-		resource: res,
-		reader:   reader,
-		views:    views,
+		resource:        res,
+		reader:          reader,
+		views:           views,
+		int64Measures:   map[observableID[int64]][]aggregate.Measure[int64]{},
+		float64Measures: map[observableID[float64]][]aggregate.Measure[float64]{},
+		exemplarFilter:  exemplarFilter,
 		// aggregations is lazy allocated when needed.
 	}
 }
@@ -63,9 +65,26 @@ type pipeline struct {
 	views  []View
 
 	sync.Mutex
-	aggregations   map[instrumentation.Scope][]instrumentSync
-	callbacks      []func(context.Context) error
-	multiCallbacks list.List
+	int64Measures   map[observableID[int64]][]aggregate.Measure[int64]
+	float64Measures map[observableID[float64]][]aggregate.Measure[float64]
+	aggregations    map[instrumentation.Scope][]instrumentSync
+	callbacks       []func(context.Context) error
+	multiCallbacks  list.List
+	exemplarFilter  exemplar.Filter
+}
+
+// addInt64Measure adds a new int64 measure to the pipeline for each observer.
+func (p *pipeline) addInt64Measure(id observableID[int64], m []aggregate.Measure[int64]) {
+	p.Lock()
+	defer p.Unlock()
+	p.int64Measures[id] = m
+}
+
+// addFloat64Measure adds a new float64 measure to the pipeline for each observer.
+func (p *pipeline) addFloat64Measure(id observableID[float64], m []aggregate.Measure[float64]) {
+	p.Lock()
+	defer p.Unlock()
+	p.float64Measures[id] = m
 }
 
 // addSync adds the instrumentSync to pipeline p with scope. This method is not
@@ -105,14 +124,15 @@ func (p *pipeline) produce(ctx context.Context, rm *metricdata.ResourceMetrics)
 	p.Lock()
 	defer p.Unlock()
 
-	var errs multierror
+	var err error
 	for _, c := range p.callbacks {
 		// TODO make the callbacks parallel. ( #3034 )
-		if err := c(ctx); err != nil {
-			errs.append(err)
+		if e := c(ctx); e != nil {
+			err = errors.Join(err, e)
 		}
 		if err := ctx.Err(); err != nil {
 			rm.Resource = nil
+			clear(rm.ScopeMetrics) // Erase elements to let GC collect objects.
 			rm.ScopeMetrics = rm.ScopeMetrics[:0]
 			return err
 		}
@@ -120,12 +140,13 @@ func (p *pipeline) produce(ctx context.Context, rm *metricdata.ResourceMetrics)
 	for e := p.multiCallbacks.Front(); e != nil; e = e.Next() {
 		// TODO make the callbacks parallel. ( #3034 )
 		f := e.Value.(multiCallback)
-		if err := f(ctx); err != nil {
-			errs.append(err)
+		if e := f(ctx); e != nil {
+			err = errors.Join(err, e)
 		}
 		if err := ctx.Err(); err != nil {
 			// This means the context expired before we finished running callbacks.
 			rm.Resource = nil
+			clear(rm.ScopeMetrics) // Erase elements to let GC collect objects.
 			rm.ScopeMetrics = rm.ScopeMetrics[:0]
 			return err
 		}
@@ -157,7 +178,7 @@ func (p *pipeline) produce(ctx context.Context, rm *metricdata.ResourceMetrics)
 
 	rm.ScopeMetrics = rm.ScopeMetrics[:i]
 
-	return errs.errorOrNil()
+	return err
 }
 
 // inserter facilitates inserting of new instruments from a single scope into a
@@ -219,7 +240,7 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation)
 		measures []aggregate.Measure[N]
 	)
 
-	errs := &multierror{wrapped: errCreatingAggregators}
+	var err error
 	seen := make(map[uint64]struct{})
 	for _, v := range i.pipeline.views {
 		stream, match := v(inst)
@@ -227,9 +248,9 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation)
 			continue
 		}
 		matched = true
-		in, id, err := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation)
-		if err != nil {
-			errs.append(err)
+		in, id, e := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation)
+		if e != nil {
+			err = errors.Join(err, e)
 		}
 		if in == nil { // Drop aggregation.
 			continue
@@ -242,8 +263,12 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation)
 		measures = append(measures, in)
 	}
 
+	if err != nil {
+		err = errors.Join(errCreatingAggregators, err)
+	}
+
 	if matched {
-		return measures, errs.errorOrNil()
+		return measures, err
 	}
 
 	// Apply implicit default view if no explicit matched.
@@ -252,15 +277,18 @@ func (i *inserter[N]) Instrument(inst Instrument, readerAggregation Aggregation)
 		Description: inst.Description,
 		Unit:        inst.Unit,
 	}
-	in, _, err := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation)
-	if err != nil {
-		errs.append(err)
+	in, _, e := i.cachedAggregator(inst.Scope, inst.Kind, stream, readerAggregation)
+	if e != nil {
+		if err == nil {
+			err = errCreatingAggregators
+		}
+		err = errors.Join(err, e)
 	}
 	if in != nil {
 		// Ensured to have not seen given matched was false.
 		measures = append(measures, in)
 	}
-	return measures, errs.errorOrNil()
+	return measures, err
 }
 
 // addCallback registers a single instrument callback to be run when
@@ -329,6 +357,9 @@ func (i *inserter[N]) cachedAggregator(scope instrumentation.Scope, kind Instrum
 		// The view explicitly requested the default aggregation.
 		stream.Aggregation = DefaultAggregationSelector(kind)
 	}
+	if stream.ExemplarReservoirProviderSelector == nil {
+		stream.ExemplarReservoirProviderSelector = DefaultExemplarReservoirProviderSelector
+	}
 
 	if err := isAggregatorCompatible(kind, stream.Aggregation); err != nil {
 		return nil, 0, fmt.Errorf(
@@ -349,7 +380,7 @@ func (i *inserter[N]) cachedAggregator(scope instrumentation.Scope, kind Instrum
 	cv := i.aggregators.Lookup(normID, func() aggVal[N] {
 		b := aggregate.Builder[N]{
 			Temporality:   i.pipeline.reader.temporality(kind),
-			ReservoirFunc: reservoirFunc[N](stream.Aggregation),
+			ReservoirFunc: reservoirFunc[N](stream.ExemplarReservoirProviderSelector(stream.Aggregation), i.pipeline.exemplarFilter),
 		}
 		b.Filter = stream.AttributeFilter
 		// A value less than or equal to zero will disable the aggregation
@@ -552,24 +583,16 @@ func isAggregatorCompatible(kind InstrumentKind, agg Aggregation) error {
 // measurement.
 type pipelines []*pipeline
 
-func newPipelines(res *resource.Resource, readers []Reader, views []View) pipelines {
+func newPipelines(res *resource.Resource, readers []Reader, views []View, exemplarFilter exemplar.Filter) pipelines {
 	pipes := make([]*pipeline, 0, len(readers))
 	for _, r := range readers {
-		p := newPipeline(res, r, views)
+		p := newPipeline(res, r, views, exemplarFilter)
 		r.register(p)
 		pipes = append(pipes, p)
 	}
 	return pipes
 }
 
-func (p pipelines) registerMultiCallback(c multiCallback) metric.Registration {
-	unregs := make([]func(), len(p))
-	for i, pipe := range p {
-		unregs[i] = pipe.addMultiCallback(c)
-	}
-	return unregisterFuncs{f: unregs}
-}
-
 type unregisterFuncs struct {
 	embedded.Registration
 	f []func()
@@ -602,15 +625,15 @@ func newResolver[N int64 | float64](p pipelines, vc *cache[string, instID]) reso
 func (r resolver[N]) Aggregators(id Instrument) ([]aggregate.Measure[N], error) {
 	var measures []aggregate.Measure[N]
 
-	errs := &multierror{}
+	var err error
 	for _, i := range r.inserters {
-		in, err := i.Instrument(id, i.readerDefaultAggregation(id.Kind))
-		if err != nil {
-			errs.append(err)
+		in, e := i.Instrument(id, i.readerDefaultAggregation(id.Kind))
+		if e != nil {
+			err = errors.Join(err, e)
 		}
 		measures = append(measures, in...)
 	}
-	return measures, errs.errorOrNil()
+	return measures, err
 }
 
 // HistogramAggregators returns the histogram Aggregators that must be updated by the instrument
@@ -619,37 +642,18 @@ func (r resolver[N]) Aggregators(id Instrument) ([]aggregate.Measure[N], error)
 func (r resolver[N]) HistogramAggregators(id Instrument, boundaries []float64) ([]aggregate.Measure[N], error) {
 	var measures []aggregate.Measure[N]
 
-	errs := &multierror{}
+	var err error
 	for _, i := range r.inserters {
 		agg := i.readerDefaultAggregation(id.Kind)
 		if histAgg, ok := agg.(AggregationExplicitBucketHistogram); ok && len(boundaries) > 0 {
 			histAgg.Boundaries = boundaries
 			agg = histAgg
 		}
-		in, err := i.Instrument(id, agg)
-		if err != nil {
-			errs.append(err)
+		in, e := i.Instrument(id, agg)
+		if e != nil {
+			err = errors.Join(err, e)
 		}
 		measures = append(measures, in...)
 	}
-	return measures, errs.errorOrNil()
-}
-
-type multierror struct {
-	wrapped error
-	errors  []string
-}
-
-func (m *multierror) errorOrNil() error {
-	if len(m.errors) == 0 {
-		return nil
-	}
-	if m.wrapped == nil {
-		return errors.New(strings.Join(m.errors, "; "))
-	}
-	return fmt.Errorf("%w: %s", m.wrapped, strings.Join(m.errors, "; "))
-}
-
-func (m *multierror) append(err error) {
-	m.errors = append(m.errors, err.Error())
+	return measures, err
 }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go b/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go
index a82af538..2fca89e5 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/provider.go
@@ -42,7 +42,7 @@ func NewMeterProvider(options ...Option) *MeterProvider {
 	flush, sdown := conf.readerSignals()
 
 	mp := &MeterProvider{
-		pipes:      newPipelines(conf.res, conf.readers, conf.views),
+		pipes:      newPipelines(conf.res, conf.readers, conf.views, conf.exemplarFilter),
 		forceFlush: flush,
 		shutdown:   sdown,
 	}
@@ -76,15 +76,17 @@ func (mp *MeterProvider) Meter(name string, options ...metric.MeterOption) metri
 
 	c := metric.NewMeterConfig(options...)
 	s := instrumentation.Scope{
-		Name:      name,
-		Version:   c.InstrumentationVersion(),
-		SchemaURL: c.SchemaURL(),
+		Name:       name,
+		Version:    c.InstrumentationVersion(),
+		SchemaURL:  c.SchemaURL(),
+		Attributes: c.InstrumentationAttributes(),
 	}
 
 	global.Info("Meter created",
 		"Name", s.Name,
 		"Version", s.Version,
 		"SchemaURL", s.SchemaURL,
+		"Attributes", s.Attributes,
 	)
 
 	return mp.meters.Lookup(s, func() *meter {
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/version.go b/vendor/go.opentelemetry.io/otel/sdk/metric/version.go
index 44316caa..6347060b 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/version.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/version.go
@@ -5,5 +5,5 @@ package metric // import "go.opentelemetry.io/otel/sdk/metric"
 
 // version is the current release version of the metric SDK in use.
 func version() string {
-	return "1.29.0"
+	return "1.32.0"
 }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/metric/view.go b/vendor/go.opentelemetry.io/otel/sdk/metric/view.go
index cd08c673..630890f4 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/metric/view.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/metric/view.go
@@ -96,11 +96,12 @@ func NewView(criteria Instrument, mask Stream) View {
 	return func(i Instrument) (Stream, bool) {
 		if matchFunc(i) {
 			return Stream{
-				Name:            nonZero(mask.Name, i.Name),
-				Description:     nonZero(mask.Description, i.Description),
-				Unit:            nonZero(mask.Unit, i.Unit),
-				Aggregation:     agg,
-				AttributeFilter: mask.AttributeFilter,
+				Name:                              nonZero(mask.Name, i.Name),
+				Description:                       nonZero(mask.Description, i.Description),
+				Unit:                              nonZero(mask.Unit, i.Unit),
+				Aggregation:                       agg,
+				AttributeFilter:                   mask.AttributeFilter,
+				ExemplarReservoirProviderSelector: mask.ExemplarReservoirProviderSelector,
 			}, true
 		}
 		return Stream{}, false
diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go b/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go
index 95a61d61..c02aeefd 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/resource/auto.go
@@ -7,7 +7,6 @@ import (
 	"context"
 	"errors"
 	"fmt"
-	"strings"
 )
 
 // ErrPartialResource is returned by a detector when complete source
@@ -57,62 +56,37 @@ func Detect(ctx context.Context, detectors ...Detector) (*Resource, error) {
 // these errors will be returned. Otherwise, nil is returned.
 func detect(ctx context.Context, res *Resource, detectors []Detector) error {
 	var (
-		r    *Resource
-		errs detectErrs
-		err  error
+		r   *Resource
+		err error
+		e   error
 	)
 
 	for _, detector := range detectors {
 		if detector == nil {
 			continue
 		}
-		r, err = detector.Detect(ctx)
-		if err != nil {
-			errs = append(errs, err)
-			if !errors.Is(err, ErrPartialResource) {
+		r, e = detector.Detect(ctx)
+		if e != nil {
+			err = errors.Join(err, e)
+			if !errors.Is(e, ErrPartialResource) {
 				continue
 			}
 		}
-		r, err = Merge(res, r)
-		if err != nil {
-			errs = append(errs, err)
+		r, e = Merge(res, r)
+		if e != nil {
+			err = errors.Join(err, e)
 		}
 		*res = *r
 	}
 
-	if len(errs) == 0 {
-		return nil
-	}
-	if errors.Is(errs, ErrSchemaURLConflict) {
-		// If there has been a merge conflict, ensure the resource has no
-		// schema URL.
-		res.schemaURL = ""
-	}
-	return errs
-}
-
-type detectErrs []error
-
-func (e detectErrs) Error() string {
-	errStr := make([]string, len(e))
-	for i, err := range e {
-		errStr[i] = fmt.Sprintf("* %s", err)
-	}
-
-	format := "%d errors occurred detecting resource:\n\t%s"
-	return fmt.Sprintf(format, len(e), strings.Join(errStr, "\n\t"))
-}
+	if err != nil {
+		if errors.Is(err, ErrSchemaURLConflict) {
+			// If there has been a merge conflict, ensure the resource has no
+			// schema URL.
+			res.schemaURL = ""
+		}
 
-func (e detectErrs) Unwrap() error {
-	switch len(e) {
-	case 0:
-		return nil
-	case 1:
-		return e[0]
+		err = fmt.Errorf("error detecting resource: %w", err)
 	}
-	return e[1:]
-}
-
-func (e detectErrs) Is(target error) bool {
-	return len(e) != 0 && errors.Is(e[0], target)
+	return err
 }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go b/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go
index 6ac1cdbf..cf3c88e1 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/resource/builtin.go
@@ -20,15 +20,13 @@ type (
 	// telemetrySDK is a Detector that provides information about
 	// the OpenTelemetry SDK used.  This Detector is included as a
 	// builtin. If these resource attributes are not wanted, use
-	// the WithTelemetrySDK(nil) or WithoutBuiltin() options to
-	// explicitly disable them.
+	// resource.New() to explicitly disable them.
 	telemetrySDK struct{}
 
 	// host is a Detector that provides information about the host
 	// being run on. This Detector is included as a builtin. If
 	// these resource attributes are not wanted, use the
-	// WithHost(nil) or WithoutBuiltin() options to explicitly
-	// disable them.
+	// resource.New() to explicitly disable them.
 	host struct{}
 
 	stringDetector struct {
diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go b/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go
index 71386e2d..3677c83d 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/resource/host_id_windows.go
@@ -10,17 +10,16 @@ import (
 	"golang.org/x/sys/windows/registry"
 )
 
-// implements hostIDReader
+// implements hostIDReader.
 type hostIDReaderWindows struct{}
 
-// read reads MachineGuid from the windows registry key:
-// SOFTWARE\Microsoft\Cryptography
+// read reads MachineGuid from the Windows registry key:
+// SOFTWARE\Microsoft\Cryptography.
 func (*hostIDReaderWindows) read() (string, error) {
 	k, err := registry.OpenKey(
 		registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Cryptography`,
 		registry.QUERY_VALUE|registry.WOW64_64KEY,
 	)
-
 	if err != nil {
 		return "", err
 	}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go b/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go
index 5e3d199d..a6a5a53c 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/resource/os_windows.go
@@ -17,7 +17,6 @@ import (
 func platformOSDescription() (string, error) {
 	k, err := registry.OpenKey(
 		registry.LOCAL_MACHINE, `SOFTWARE\Microsoft\Windows NT\CurrentVersion`, registry.QUERY_VALUE)
-
 	if err != nil {
 		return "", err
 	}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go b/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go
index 1d399a75..ccc97e1b 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/trace/batch_span_processor.go
@@ -280,6 +280,7 @@ func (bsp *batchSpanProcessor) exportSpans(ctx context.Context) error {
 		//
 		// It is up to the exporter to implement any type of retry logic if a batch is failing
 		// to be exported, since it is specific to the protocol and backend being sent to.
+		clear(bsp.batch) // Erase elements to let GC collect objects
 		bsp.batch = bsp.batch[:0]
 
 		if err != nil {
@@ -316,7 +317,11 @@ func (bsp *batchSpanProcessor) processQueue() {
 			bsp.batchMutex.Unlock()
 			if shouldExport {
 				if !bsp.timer.Stop() {
-					<-bsp.timer.C
+					// Handle both GODEBUG=asynctimerchan=[0|1] properly.
+					select {
+					case <-bsp.timer.C:
+					default:
+					}
 				}
 				if err := bsp.exportSpans(ctx); err != nil {
 					otel.Handle(err)
diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go b/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go
index 821c83fa..8c308dd6 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/trace/evictedqueue.go
@@ -12,25 +12,26 @@ import (
 
 // evictedQueue is a FIFO queue with a configurable capacity.
 type evictedQueue[T any] struct {
-	queue        []T
-	capacity     int
-	droppedCount int
-	logDropped   func()
+	queue          []T
+	capacity       int
+	droppedCount   int
+	logDroppedMsg  string
+	logDroppedOnce sync.Once
 }
 
 func newEvictedQueueEvent(capacity int) evictedQueue[Event] {
 	// Do not pre-allocate queue, do this lazily.
 	return evictedQueue[Event]{
-		capacity:   capacity,
-		logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Event") }),
+		capacity:      capacity,
+		logDroppedMsg: "limit reached: dropping trace trace.Event",
 	}
 }
 
 func newEvictedQueueLink(capacity int) evictedQueue[Link] {
 	// Do not pre-allocate queue, do this lazily.
 	return evictedQueue[Link]{
-		capacity:   capacity,
-		logDropped: sync.OnceFunc(func() { global.Warn("limit reached: dropping trace trace.Link") }),
+		capacity:      capacity,
+		logDroppedMsg: "limit reached: dropping trace trace.Link",
 	}
 }
 
@@ -53,6 +54,10 @@ func (eq *evictedQueue[T]) add(value T) {
 	eq.queue = append(eq.queue, value)
 }
 
+func (eq *evictedQueue[T]) logDropped() {
+	eq.logDroppedOnce.Do(func() { global.Warn(eq.logDroppedMsg) })
+}
+
 // copy returns a copy of the evictedQueue.
 func (eq *evictedQueue[T]) copy() []T {
 	return slices.Clone(eq.queue)
diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go b/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go
index 14c2e5be..185aa7c0 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/trace/provider.go
@@ -139,9 +139,10 @@ func (p *TracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T
 		name = defaultTracerName
 	}
 	is := instrumentation.Scope{
-		Name:      name,
-		Version:   c.InstrumentationVersion(),
-		SchemaURL: c.SchemaURL(),
+		Name:       name,
+		Version:    c.InstrumentationVersion(),
+		SchemaURL:  c.SchemaURL(),
+		Attributes: c.InstrumentationAttributes(),
 	}
 
 	t, ok := func() (trace.Tracer, bool) {
@@ -168,7 +169,7 @@ func (p *TracerProvider) Tracer(name string, opts ...trace.TracerOption) trace.T
 		//   slowing down all tracing consumers.
 		// - Logging code may be instrumented with tracing and deadlock because it could try
 		//   acquiring the same non-reentrant mutex.
-		global.Info("Tracer created", "name", name, "version", is.Version, "schemaURL", is.SchemaURL)
+		global.Info("Tracer created", "name", name, "version", is.Version, "schemaURL", is.SchemaURL, "attributes", is.Attributes)
 	}
 	return t
 }
diff --git a/vendor/go.opentelemetry.io/otel/sdk/trace/span.go b/vendor/go.opentelemetry.io/otel/sdk/trace/span.go
index 4945f508..17f883c2 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/trace/span.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/trace/span.go
@@ -174,6 +174,17 @@ func (s *recordingSpan) IsRecording() bool {
 	s.mu.Lock()
 	defer s.mu.Unlock()
 
+	return s.isRecording()
+}
+
+// isRecording returns if this span is being recorded. If this span has ended
+// this will return false.
+//
+// This method assumes s.mu.Lock is held by the caller.
+func (s *recordingSpan) isRecording() bool {
+	if s == nil {
+		return false
+	}
 	return s.endTime.IsZero()
 }
 
@@ -182,11 +193,15 @@ func (s *recordingSpan) IsRecording() bool {
 // included in the set status when the code is for an error. If this span is
 // not being recorded than this method does nothing.
 func (s *recordingSpan) SetStatus(code codes.Code, description string) {
-	if !s.IsRecording() {
+	if s == nil {
 		return
 	}
+
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	if !s.isRecording() {
+		return
+	}
 	if s.status.Code > code {
 		return
 	}
@@ -210,12 +225,15 @@ func (s *recordingSpan) SetStatus(code codes.Code, description string) {
 // attributes the span is configured to have, the last added attributes will
 // be dropped.
 func (s *recordingSpan) SetAttributes(attributes ...attribute.KeyValue) {
-	if !s.IsRecording() {
+	if s == nil || len(attributes) == 0 {
 		return
 	}
 
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	if !s.isRecording() {
+		return
+	}
 
 	limit := s.tracer.provider.spanLimits.AttributeCountLimit
 	if limit == 0 {
@@ -233,7 +251,7 @@ func (s *recordingSpan) SetAttributes(attributes ...attribute.KeyValue) {
 
 	// Otherwise, add without deduplication. When attributes are read they
 	// will be deduplicated, optimizing the operation.
-	s.attributes = slices.Grow(s.attributes, len(s.attributes)+len(attributes))
+	s.attributes = slices.Grow(s.attributes, len(attributes))
 	for _, a := range attributes {
 		if !a.Valid() {
 			// Drop all invalid attributes.
@@ -280,13 +298,17 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) {
 
 	// Do not set a capacity when creating this map. Benchmark testing has
 	// showed this to only add unused memory allocations in general use.
-	exists := make(map[attribute.Key]int)
-	s.dedupeAttrsFromRecord(&exists)
+	exists := make(map[attribute.Key]int, len(s.attributes))
+	s.dedupeAttrsFromRecord(exists)
 
 	// Now that s.attributes is deduplicated, adding unique attributes up to
 	// the capacity of s will not over allocate s.attributes.
-	sum := len(attrs) + len(s.attributes)
-	s.attributes = slices.Grow(s.attributes, min(sum, limit))
+
+	// max size = limit
+	maxCap := min(len(attrs)+len(s.attributes), limit)
+	if cap(s.attributes) < maxCap {
+		s.attributes = slices.Grow(s.attributes, maxCap-cap(s.attributes))
+	}
 	for _, a := range attrs {
 		if !a.Valid() {
 			// Drop all invalid attributes.
@@ -296,6 +318,7 @@ func (s *recordingSpan) addOverCapAttrs(limit int, attrs []attribute.KeyValue) {
 
 		if idx, ok := exists[a.Key]; ok {
 			// Perform all updates before dropping, even when at capacity.
+			a = truncateAttr(s.tracer.provider.spanLimits.AttributeValueLengthLimit, a)
 			s.attributes[idx] = a
 			continue
 		}
@@ -386,9 +409,10 @@ func (s *recordingSpan) End(options ...trace.SpanEndOption) {
 	// the span's duration in case some operation below takes a while.
 	et := monotonicEndTime(s.startTime)
 
-	// Do relative expensive check now that we have an end time and see if we
-	// need to do any more processing.
-	if !s.IsRecording() {
+	// Lock the span now that we have an end time and see if we need to do any more processing.
+	s.mu.Lock()
+	if !s.isRecording() {
+		s.mu.Unlock()
 		return
 	}
 
@@ -413,10 +437,11 @@ func (s *recordingSpan) End(options ...trace.SpanEndOption) {
 	}
 
 	if s.executionTracerTaskEnd != nil {
+		s.mu.Unlock()
 		s.executionTracerTaskEnd()
+		s.mu.Lock()
 	}
 
-	s.mu.Lock()
 	// Setting endTime to non-zero marks the span as ended and not recording.
 	if config.Timestamp().IsZero() {
 		s.endTime = et
@@ -450,7 +475,13 @@ func monotonicEndTime(start time.Time) time.Time {
 // does not change the Span status. If this span is not being recorded or err is nil
 // than this method does nothing.
 func (s *recordingSpan) RecordError(err error, opts ...trace.EventOption) {
-	if s == nil || err == nil || !s.IsRecording() {
+	if s == nil || err == nil {
+		return
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.isRecording() {
 		return
 	}
 
@@ -486,14 +517,23 @@ func recordStackTrace() string {
 }
 
 // AddEvent adds an event with the provided name and options. If this span is
-// not being recorded than this method does nothing.
+// not being recorded then this method does nothing.
 func (s *recordingSpan) AddEvent(name string, o ...trace.EventOption) {
-	if !s.IsRecording() {
+	if s == nil {
+		return
+	}
+
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.isRecording() {
 		return
 	}
 	s.addEvent(name, o...)
 }
 
+// addEvent adds an event with the provided name and options.
+//
+// This method assumes s.mu.Lock is held by the caller.
 func (s *recordingSpan) addEvent(name string, o ...trace.EventOption) {
 	c := trace.NewEventConfig(o...)
 	e := Event{Name: name, Attributes: c.Attributes(), Time: c.Timestamp()}
@@ -510,20 +550,21 @@ func (s *recordingSpan) addEvent(name string, o ...trace.EventOption) {
 		e.Attributes = e.Attributes[:limit]
 	}
 
-	s.mu.Lock()
 	s.events.add(e)
-	s.mu.Unlock()
 }
 
 // SetName sets the name of this span. If this span is not being recorded than
 // this method does nothing.
 func (s *recordingSpan) SetName(name string) {
-	if !s.IsRecording() {
+	if s == nil {
 		return
 	}
 
 	s.mu.Lock()
 	defer s.mu.Unlock()
+	if !s.isRecording() {
+		return
+	}
 	s.name = name
 }
 
@@ -579,29 +620,26 @@ func (s *recordingSpan) Attributes() []attribute.KeyValue {
 func (s *recordingSpan) dedupeAttrs() {
 	// Do not set a capacity when creating this map. Benchmark testing has
 	// showed this to only add unused memory allocations in general use.
-	exists := make(map[attribute.Key]int)
-	s.dedupeAttrsFromRecord(&exists)
+	exists := make(map[attribute.Key]int, len(s.attributes))
+	s.dedupeAttrsFromRecord(exists)
 }
 
 // dedupeAttrsFromRecord deduplicates the attributes of s to fit capacity
 // using record as the record of unique attribute keys to their index.
 //
 // This method assumes s.mu.Lock is held by the caller.
-func (s *recordingSpan) dedupeAttrsFromRecord(record *map[attribute.Key]int) {
+func (s *recordingSpan) dedupeAttrsFromRecord(record map[attribute.Key]int) {
 	// Use the fact that slices share the same backing array.
 	unique := s.attributes[:0]
 	for _, a := range s.attributes {
-		if idx, ok := (*record)[a.Key]; ok {
+		if idx, ok := record[a.Key]; ok {
 			unique[idx] = a
 		} else {
 			unique = append(unique, a)
-			(*record)[a.Key] = len(unique) - 1
+			record[a.Key] = len(unique) - 1
 		}
 	}
-	// s.attributes have element types of attribute.KeyValue. These types are
-	// not pointers and they themselves do not contain pointer fields,
-	// therefore the duplicate values do not need to be zeroed for them to be
-	// garbage collected.
+	clear(s.attributes[len(unique):]) // Erase unneeded elements to let GC collect objects.
 	s.attributes = unique
 }
 
@@ -657,7 +695,7 @@ func (s *recordingSpan) Resource() *resource.Resource {
 }
 
 func (s *recordingSpan) AddLink(link trace.Link) {
-	if !s.IsRecording() {
+	if s == nil {
 		return
 	}
 	if !link.SpanContext.IsValid() && len(link.Attributes) == 0 &&
@@ -665,6 +703,12 @@ func (s *recordingSpan) AddLink(link trace.Link) {
 		return
 	}
 
+	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.isRecording() {
+		return
+	}
+
 	l := Link{SpanContext: link.SpanContext, Attributes: link.Attributes}
 
 	// Discard attributes over limit.
@@ -678,9 +722,7 @@ func (s *recordingSpan) AddLink(link trace.Link) {
 		l.Attributes = l.Attributes[:limit]
 	}
 
-	s.mu.Lock()
 	s.links.add(l)
-	s.mu.Unlock()
 }
 
 // DroppedAttributes returns the number of attributes dropped by the span
@@ -755,12 +797,16 @@ func (s *recordingSpan) snapshot() ReadOnlySpan {
 }
 
 func (s *recordingSpan) addChild() {
-	if !s.IsRecording() {
+	if s == nil {
 		return
 	}
+
 	s.mu.Lock()
+	defer s.mu.Unlock()
+	if !s.isRecording() {
+		return
+	}
 	s.childSpanCount++
-	s.mu.Unlock()
 }
 
 func (*recordingSpan) private() {}
diff --git a/vendor/go.opentelemetry.io/otel/sdk/version.go b/vendor/go.opentelemetry.io/otel/sdk/version.go
index b7cede89..0b214d3f 100644
--- a/vendor/go.opentelemetry.io/otel/sdk/version.go
+++ b/vendor/go.opentelemetry.io/otel/sdk/version.go
@@ -5,5 +5,5 @@ package sdk // import "go.opentelemetry.io/otel/sdk"
 
 // Version is the current release version of the OpenTelemetry SDK in use.
 func Version() string {
-	return "1.29.0"
+	return "1.32.0"
 }
diff --git a/vendor/go.opentelemetry.io/otel/trace/context.go b/vendor/go.opentelemetry.io/otel/trace/context.go
index 5650a174..8c45a710 100644
--- a/vendor/go.opentelemetry.io/otel/trace/context.go
+++ b/vendor/go.opentelemetry.io/otel/trace/context.go
@@ -22,7 +22,7 @@ func ContextWithSpanContext(parent context.Context, sc SpanContext) context.Cont
 	return ContextWithSpan(parent, nonRecordingSpan{sc: sc})
 }
 
-// ContextWithRemoteSpanContext returns a copy of parent with rsc set explicly
+// ContextWithRemoteSpanContext returns a copy of parent with rsc set explicitly
 // as a remote SpanContext and as the current Span. The Span implementation
 // that wraps rsc is non-recording and performs no operations other than to
 // return rsc as the SpanContext from the SpanContext method.
diff --git a/vendor/go.opentelemetry.io/otel/trace/doc.go b/vendor/go.opentelemetry.io/otel/trace/doc.go
index d661c5d1..cdbf41d6 100644
--- a/vendor/go.opentelemetry.io/otel/trace/doc.go
+++ b/vendor/go.opentelemetry.io/otel/trace/doc.go
@@ -96,7 +96,7 @@ can embed the API interface directly.
 
 This option is not recommended. It will lead to publishing packages that
 contain runtime panics when users update to newer versions of
-[go.opentelemetry.io/otel/trace], which may be done with a trasitive
+[go.opentelemetry.io/otel/trace], which may be done with a transitive
 dependency.
 
 Finally, an author can embed another implementation in theirs. The embedded
diff --git a/vendor/go.opentelemetry.io/otel/verify_examples.sh b/vendor/go.opentelemetry.io/otel/verify_examples.sh
deleted file mode 100644
index e57bf57f..00000000
--- a/vendor/go.opentelemetry.io/otel/verify_examples.sh
+++ /dev/null
@@ -1,74 +0,0 @@
-#!/bin/bash
-
-# Copyright The OpenTelemetry Authors
-# SPDX-License-Identifier: Apache-2.0
-
-set -euo pipefail
-
-cd $(dirname $0)
-TOOLS_DIR=$(pwd)/.tools
-
-if [ -z "${GOPATH}" ] ; then
-	printf "GOPATH is not defined.\n"
-	exit -1
-fi
-
-if [ ! -d "${GOPATH}" ] ; then
-	printf "GOPATH ${GOPATH} is invalid \n"
-	exit -1
-fi
-
-# Pre-requisites
-if ! git diff --quiet; then \
-	git status
-	printf "\n\nError: working tree is not clean\n"
-	exit -1
-fi
-
-if [ "$(git tag --contains $(git log -1 --pretty=format:"%H"))" = "" ] ; then
-	printf "$(git log -1)"
-	printf "\n\nError: HEAD is not pointing to a tagged version"
-fi
-
-make ${TOOLS_DIR}/gojq
-
-DIR_TMP="${GOPATH}/src/oteltmp/"
-rm -rf $DIR_TMP
-mkdir -p $DIR_TMP
-
-printf "Copy examples to ${DIR_TMP}\n"
-cp -a ./example ${DIR_TMP}
-
-# Update go.mod files
-printf "Update go.mod: rename module and remove replace\n"
-
-PACKAGE_DIRS=$(find . -mindepth 2 -type f -name 'go.mod' -exec dirname {} \; | egrep 'example' | sed 's/^\.\///' | sort)
-
-for dir in $PACKAGE_DIRS; do
-	printf "  Update go.mod for $dir\n"
-	(cd "${DIR_TMP}/${dir}" && \
-	 # replaces is ("mod1" "mod2" …)
-	 replaces=($(go mod edit -json | ${TOOLS_DIR}/gojq '.Replace[].Old.Path')) && \
-	 # strip double quotes
-	 replaces=("${replaces[@]%\"}") && \
-	 replaces=("${replaces[@]#\"}") && \
-	 # make an array (-dropreplace=mod1 -dropreplace=mod2 …)
-	 dropreplaces=("${replaces[@]/#/-dropreplace=}") && \
-	 go mod edit -module "oteltmp/${dir}" "${dropreplaces[@]}" && \
-	 go mod tidy)
-done
-printf "Update done:\n\n"
-
-# Build directories that contain main package. These directories are different than
-# directories that contain go.mod files.
-printf "Build examples:\n"
-EXAMPLES=$(./get_main_pkgs.sh ./example)
-for ex in $EXAMPLES; do
-	printf "  Build $ex in ${DIR_TMP}/${ex}\n"
-	(cd "${DIR_TMP}/${ex}" && \
-	 go build .)
-done
-
-# Cleanup
-printf "Remove copied files.\n"
-rm -rf $DIR_TMP
diff --git a/vendor/go.opentelemetry.io/otel/version.go b/vendor/go.opentelemetry.io/otel/version.go
index f67039ed..59e24816 100644
--- a/vendor/go.opentelemetry.io/otel/version.go
+++ b/vendor/go.opentelemetry.io/otel/version.go
@@ -5,5 +5,5 @@ package otel // import "go.opentelemetry.io/otel"
 
 // Version is the current release version of OpenTelemetry in use.
 func Version() string {
-	return "1.29.0"
+	return "1.32.0"
 }
diff --git a/vendor/go.opentelemetry.io/otel/versions.yaml b/vendor/go.opentelemetry.io/otel/versions.yaml
index 3ba611d7..c04b12f6 100644
--- a/vendor/go.opentelemetry.io/otel/versions.yaml
+++ b/vendor/go.opentelemetry.io/otel/versions.yaml
@@ -3,19 +3,13 @@
 
 module-sets:
   stable-v1:
-    version: v1.29.0
+    version: v1.32.0
     modules:
       - go.opentelemetry.io/otel
       - go.opentelemetry.io/otel/bridge/opencensus
       - go.opentelemetry.io/otel/bridge/opencensus/test
       - go.opentelemetry.io/otel/bridge/opentracing
       - go.opentelemetry.io/otel/bridge/opentracing/test
-      - go.opentelemetry.io/otel/example/dice
-      - go.opentelemetry.io/otel/example/namedtracer
-      - go.opentelemetry.io/otel/example/opencensus
-      - go.opentelemetry.io/otel/example/otel-collector
-      - go.opentelemetry.io/otel/example/passthrough
-      - go.opentelemetry.io/otel/example/zipkin
       - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc
       - go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp
       - go.opentelemetry.io/otel/exporters/otlp/otlptrace
@@ -29,12 +23,11 @@ module-sets:
       - go.opentelemetry.io/otel/sdk/metric
       - go.opentelemetry.io/otel/trace
   experimental-metrics:
-    version: v0.51.0
+    version: v0.54.0
     modules:
-      - go.opentelemetry.io/otel/example/prometheus
       - go.opentelemetry.io/otel/exporters/prometheus
   experimental-logs:
-    version: v0.5.0
+    version: v0.8.0
     modules:
       - go.opentelemetry.io/otel/log
       - go.opentelemetry.io/otel/sdk/log
@@ -42,7 +35,7 @@ module-sets:
       - go.opentelemetry.io/otel/exporters/otlp/otlplog/otlploghttp
       - go.opentelemetry.io/otel/exporters/stdout/stdoutlog
   experimental-schema:
-    version: v0.0.8
+    version: v0.0.11
     modules:
       - go.opentelemetry.io/otel/schema
 excluded-modules:
diff --git a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
index 6713acca..c3895478 100644
--- a/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
+++ b/vendor/golang.org/x/crypto/argon2/blamka_amd64.s
@@ -1,243 +1,2791 @@
-// Copyright 2017 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blamka_amd64.go -out ../blamka_amd64.s -pkg argon2. DO NOT EDIT.
 
 //go:build amd64 && gc && !purego
 
 #include "textflag.h"
 
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
-	MOVO       v4, t1; \
-	MOVO       v5, v4; \
-	MOVO       t1, v5; \
-	MOVO       v6, t1; \
-	PUNPCKLQDQ v6, t2; \
-	PUNPCKHQDQ v7, v6; \
-	PUNPCKHQDQ t2, v6; \
-	PUNPCKLQDQ v7, t2; \
-	MOVO       t1, v7; \
-	MOVO       v2, t1; \
-	PUNPCKHQDQ t2, v7; \
-	PUNPCKLQDQ v3, t2; \
-	PUNPCKHQDQ t2, v2; \
-	PUNPCKLQDQ t1, t2; \
-	PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
-	MOVO       v4, t1; \
-	MOVO       v5, v4; \
-	MOVO       t1, v5; \
-	MOVO       v2, t1; \
-	PUNPCKLQDQ v2, t2; \
-	PUNPCKHQDQ v3, v2; \
-	PUNPCKHQDQ t2, v2; \
-	PUNPCKLQDQ v3, t2; \
-	MOVO       t1, v3; \
-	MOVO       v6, t1; \
-	PUNPCKHQDQ t2, v3; \
-	PUNPCKLQDQ v7, t2; \
-	PUNPCKHQDQ t2, v6; \
-	PUNPCKLQDQ t1, t2; \
-	PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, t0, c40, c48) \
-	MOVO    v0, t0;        \
-	PMULULQ v2, t0;        \
-	PADDQ   v2, v0;        \
-	PADDQ   t0, v0;        \
-	PADDQ   t0, v0;        \
-	PXOR    v0, v6;        \
-	PSHUFD  $0xB1, v6, v6; \
-	MOVO    v4, t0;        \
-	PMULULQ v6, t0;        \
-	PADDQ   v6, v4;        \
-	PADDQ   t0, v4;        \
-	PADDQ   t0, v4;        \
-	PXOR    v4, v2;        \
-	PSHUFB  c40, v2;       \
-	MOVO    v0, t0;        \
-	PMULULQ v2, t0;        \
-	PADDQ   v2, v0;        \
-	PADDQ   t0, v0;        \
-	PADDQ   t0, v0;        \
-	PXOR    v0, v6;        \
-	PSHUFB  c48, v6;       \
-	MOVO    v4, t0;        \
-	PMULULQ v6, t0;        \
-	PADDQ   v6, v4;        \
-	PADDQ   t0, v4;        \
-	PADDQ   t0, v4;        \
-	PXOR    v4, v2;        \
-	MOVO    v2, t0;        \
-	PADDQ   v2, t0;        \
-	PSRLQ   $63, v2;       \
-	PXOR    t0, v2;        \
-	MOVO    v1, t0;        \
-	PMULULQ v3, t0;        \
-	PADDQ   v3, v1;        \
-	PADDQ   t0, v1;        \
-	PADDQ   t0, v1;        \
-	PXOR    v1, v7;        \
-	PSHUFD  $0xB1, v7, v7; \
-	MOVO    v5, t0;        \
-	PMULULQ v7, t0;        \
-	PADDQ   v7, v5;        \
-	PADDQ   t0, v5;        \
-	PADDQ   t0, v5;        \
-	PXOR    v5, v3;        \
-	PSHUFB  c40, v3;       \
-	MOVO    v1, t0;        \
-	PMULULQ v3, t0;        \
-	PADDQ   v3, v1;        \
-	PADDQ   t0, v1;        \
-	PADDQ   t0, v1;        \
-	PXOR    v1, v7;        \
-	PSHUFB  c48, v7;       \
-	MOVO    v5, t0;        \
-	PMULULQ v7, t0;        \
-	PADDQ   v7, v5;        \
-	PADDQ   t0, v5;        \
-	PADDQ   t0, v5;        \
-	PXOR    v5, v3;        \
-	MOVO    v3, t0;        \
-	PADDQ   v3, t0;        \
-	PSRLQ   $63, v3;       \
-	PXOR    t0, v3
-
-#define LOAD_MSG_0(block, off) \
-	MOVOU 8*(off+0)(block), X0;  \
-	MOVOU 8*(off+2)(block), X1;  \
-	MOVOU 8*(off+4)(block), X2;  \
-	MOVOU 8*(off+6)(block), X3;  \
-	MOVOU 8*(off+8)(block), X4;  \
-	MOVOU 8*(off+10)(block), X5; \
-	MOVOU 8*(off+12)(block), X6; \
-	MOVOU 8*(off+14)(block), X7
-
-#define STORE_MSG_0(block, off) \
-	MOVOU X0, 8*(off+0)(block);  \
-	MOVOU X1, 8*(off+2)(block);  \
-	MOVOU X2, 8*(off+4)(block);  \
-	MOVOU X3, 8*(off+6)(block);  \
-	MOVOU X4, 8*(off+8)(block);  \
-	MOVOU X5, 8*(off+10)(block); \
-	MOVOU X6, 8*(off+12)(block); \
-	MOVOU X7, 8*(off+14)(block)
-
-#define LOAD_MSG_1(block, off) \
-	MOVOU 8*off+0*8(block), X0;  \
-	MOVOU 8*off+16*8(block), X1; \
-	MOVOU 8*off+32*8(block), X2; \
-	MOVOU 8*off+48*8(block), X3; \
-	MOVOU 8*off+64*8(block), X4; \
-	MOVOU 8*off+80*8(block), X5; \
-	MOVOU 8*off+96*8(block), X6; \
-	MOVOU 8*off+112*8(block), X7
-
-#define STORE_MSG_1(block, off) \
-	MOVOU X0, 8*off+0*8(block);  \
-	MOVOU X1, 8*off+16*8(block); \
-	MOVOU X2, 8*off+32*8(block); \
-	MOVOU X3, 8*off+48*8(block); \
-	MOVOU X4, 8*off+64*8(block); \
-	MOVOU X5, 8*off+80*8(block); \
-	MOVOU X6, 8*off+96*8(block); \
-	MOVOU X7, 8*off+112*8(block)
-
-#define BLAMKA_ROUND_0(block, off, t0, t1, c40, c48) \
-	LOAD_MSG_0(block, off);                                   \
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
-	SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1);                  \
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1);              \
-	STORE_MSG_0(block, off)
-
-#define BLAMKA_ROUND_1(block, off, t0, t1, c40, c48) \
-	LOAD_MSG_1(block, off);                                   \
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
-	SHUFFLE(X2, X3, X4, X5, X6, X7, t0, t1);                  \
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, t0, c40, c48); \
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, t0, t1);              \
-	STORE_MSG_1(block, off)
-
 // func blamkaSSE4(b *block)
-TEXT ·blamkaSSE4(SB), 4, $0-8
-	MOVQ b+0(FP), AX
-
-	MOVOU ·c40<>(SB), X10
-	MOVOU ·c48<>(SB), X11
+// Requires: SSE2, SSSE3
+TEXT ·blamkaSSE4(SB), NOSPLIT, $0-8
+	MOVQ       b+0(FP), AX
+	MOVOU      ·c40<>+0(SB), X10
+	MOVOU      ·c48<>+0(SB), X11
+	MOVOU      (AX), X0
+	MOVOU      16(AX), X1
+	MOVOU      32(AX), X2
+	MOVOU      48(AX), X3
+	MOVOU      64(AX), X4
+	MOVOU      80(AX), X5
+	MOVOU      96(AX), X6
+	MOVOU      112(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, (AX)
+	MOVOU      X1, 16(AX)
+	MOVOU      X2, 32(AX)
+	MOVOU      X3, 48(AX)
+	MOVOU      X4, 64(AX)
+	MOVOU      X5, 80(AX)
+	MOVOU      X6, 96(AX)
+	MOVOU      X7, 112(AX)
+	MOVOU      128(AX), X0
+	MOVOU      144(AX), X1
+	MOVOU      160(AX), X2
+	MOVOU      176(AX), X3
+	MOVOU      192(AX), X4
+	MOVOU      208(AX), X5
+	MOVOU      224(AX), X6
+	MOVOU      240(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 128(AX)
+	MOVOU      X1, 144(AX)
+	MOVOU      X2, 160(AX)
+	MOVOU      X3, 176(AX)
+	MOVOU      X4, 192(AX)
+	MOVOU      X5, 208(AX)
+	MOVOU      X6, 224(AX)
+	MOVOU      X7, 240(AX)
+	MOVOU      256(AX), X0
+	MOVOU      272(AX), X1
+	MOVOU      288(AX), X2
+	MOVOU      304(AX), X3
+	MOVOU      320(AX), X4
+	MOVOU      336(AX), X5
+	MOVOU      352(AX), X6
+	MOVOU      368(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 256(AX)
+	MOVOU      X1, 272(AX)
+	MOVOU      X2, 288(AX)
+	MOVOU      X3, 304(AX)
+	MOVOU      X4, 320(AX)
+	MOVOU      X5, 336(AX)
+	MOVOU      X6, 352(AX)
+	MOVOU      X7, 368(AX)
+	MOVOU      384(AX), X0
+	MOVOU      400(AX), X1
+	MOVOU      416(AX), X2
+	MOVOU      432(AX), X3
+	MOVOU      448(AX), X4
+	MOVOU      464(AX), X5
+	MOVOU      480(AX), X6
+	MOVOU      496(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 384(AX)
+	MOVOU      X1, 400(AX)
+	MOVOU      X2, 416(AX)
+	MOVOU      X3, 432(AX)
+	MOVOU      X4, 448(AX)
+	MOVOU      X5, 464(AX)
+	MOVOU      X6, 480(AX)
+	MOVOU      X7, 496(AX)
+	MOVOU      512(AX), X0
+	MOVOU      528(AX), X1
+	MOVOU      544(AX), X2
+	MOVOU      560(AX), X3
+	MOVOU      576(AX), X4
+	MOVOU      592(AX), X5
+	MOVOU      608(AX), X6
+	MOVOU      624(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 512(AX)
+	MOVOU      X1, 528(AX)
+	MOVOU      X2, 544(AX)
+	MOVOU      X3, 560(AX)
+	MOVOU      X4, 576(AX)
+	MOVOU      X5, 592(AX)
+	MOVOU      X6, 608(AX)
+	MOVOU      X7, 624(AX)
+	MOVOU      640(AX), X0
+	MOVOU      656(AX), X1
+	MOVOU      672(AX), X2
+	MOVOU      688(AX), X3
+	MOVOU      704(AX), X4
+	MOVOU      720(AX), X5
+	MOVOU      736(AX), X6
+	MOVOU      752(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 640(AX)
+	MOVOU      X1, 656(AX)
+	MOVOU      X2, 672(AX)
+	MOVOU      X3, 688(AX)
+	MOVOU      X4, 704(AX)
+	MOVOU      X5, 720(AX)
+	MOVOU      X6, 736(AX)
+	MOVOU      X7, 752(AX)
+	MOVOU      768(AX), X0
+	MOVOU      784(AX), X1
+	MOVOU      800(AX), X2
+	MOVOU      816(AX), X3
+	MOVOU      832(AX), X4
+	MOVOU      848(AX), X5
+	MOVOU      864(AX), X6
+	MOVOU      880(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 768(AX)
+	MOVOU      X1, 784(AX)
+	MOVOU      X2, 800(AX)
+	MOVOU      X3, 816(AX)
+	MOVOU      X4, 832(AX)
+	MOVOU      X5, 848(AX)
+	MOVOU      X6, 864(AX)
+	MOVOU      X7, 880(AX)
+	MOVOU      896(AX), X0
+	MOVOU      912(AX), X1
+	MOVOU      928(AX), X2
+	MOVOU      944(AX), X3
+	MOVOU      960(AX), X4
+	MOVOU      976(AX), X5
+	MOVOU      992(AX), X6
+	MOVOU      1008(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 896(AX)
+	MOVOU      X1, 912(AX)
+	MOVOU      X2, 928(AX)
+	MOVOU      X3, 944(AX)
+	MOVOU      X4, 960(AX)
+	MOVOU      X5, 976(AX)
+	MOVOU      X6, 992(AX)
+	MOVOU      X7, 1008(AX)
+	MOVOU      (AX), X0
+	MOVOU      128(AX), X1
+	MOVOU      256(AX), X2
+	MOVOU      384(AX), X3
+	MOVOU      512(AX), X4
+	MOVOU      640(AX), X5
+	MOVOU      768(AX), X6
+	MOVOU      896(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, (AX)
+	MOVOU      X1, 128(AX)
+	MOVOU      X2, 256(AX)
+	MOVOU      X3, 384(AX)
+	MOVOU      X4, 512(AX)
+	MOVOU      X5, 640(AX)
+	MOVOU      X6, 768(AX)
+	MOVOU      X7, 896(AX)
+	MOVOU      16(AX), X0
+	MOVOU      144(AX), X1
+	MOVOU      272(AX), X2
+	MOVOU      400(AX), X3
+	MOVOU      528(AX), X4
+	MOVOU      656(AX), X5
+	MOVOU      784(AX), X6
+	MOVOU      912(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 16(AX)
+	MOVOU      X1, 144(AX)
+	MOVOU      X2, 272(AX)
+	MOVOU      X3, 400(AX)
+	MOVOU      X4, 528(AX)
+	MOVOU      X5, 656(AX)
+	MOVOU      X6, 784(AX)
+	MOVOU      X7, 912(AX)
+	MOVOU      32(AX), X0
+	MOVOU      160(AX), X1
+	MOVOU      288(AX), X2
+	MOVOU      416(AX), X3
+	MOVOU      544(AX), X4
+	MOVOU      672(AX), X5
+	MOVOU      800(AX), X6
+	MOVOU      928(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 32(AX)
+	MOVOU      X1, 160(AX)
+	MOVOU      X2, 288(AX)
+	MOVOU      X3, 416(AX)
+	MOVOU      X4, 544(AX)
+	MOVOU      X5, 672(AX)
+	MOVOU      X6, 800(AX)
+	MOVOU      X7, 928(AX)
+	MOVOU      48(AX), X0
+	MOVOU      176(AX), X1
+	MOVOU      304(AX), X2
+	MOVOU      432(AX), X3
+	MOVOU      560(AX), X4
+	MOVOU      688(AX), X5
+	MOVOU      816(AX), X6
+	MOVOU      944(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 48(AX)
+	MOVOU      X1, 176(AX)
+	MOVOU      X2, 304(AX)
+	MOVOU      X3, 432(AX)
+	MOVOU      X4, 560(AX)
+	MOVOU      X5, 688(AX)
+	MOVOU      X6, 816(AX)
+	MOVOU      X7, 944(AX)
+	MOVOU      64(AX), X0
+	MOVOU      192(AX), X1
+	MOVOU      320(AX), X2
+	MOVOU      448(AX), X3
+	MOVOU      576(AX), X4
+	MOVOU      704(AX), X5
+	MOVOU      832(AX), X6
+	MOVOU      960(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 64(AX)
+	MOVOU      X1, 192(AX)
+	MOVOU      X2, 320(AX)
+	MOVOU      X3, 448(AX)
+	MOVOU      X4, 576(AX)
+	MOVOU      X5, 704(AX)
+	MOVOU      X6, 832(AX)
+	MOVOU      X7, 960(AX)
+	MOVOU      80(AX), X0
+	MOVOU      208(AX), X1
+	MOVOU      336(AX), X2
+	MOVOU      464(AX), X3
+	MOVOU      592(AX), X4
+	MOVOU      720(AX), X5
+	MOVOU      848(AX), X6
+	MOVOU      976(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 80(AX)
+	MOVOU      X1, 208(AX)
+	MOVOU      X2, 336(AX)
+	MOVOU      X3, 464(AX)
+	MOVOU      X4, 592(AX)
+	MOVOU      X5, 720(AX)
+	MOVOU      X6, 848(AX)
+	MOVOU      X7, 976(AX)
+	MOVOU      96(AX), X0
+	MOVOU      224(AX), X1
+	MOVOU      352(AX), X2
+	MOVOU      480(AX), X3
+	MOVOU      608(AX), X4
+	MOVOU      736(AX), X5
+	MOVOU      864(AX), X6
+	MOVOU      992(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 96(AX)
+	MOVOU      X1, 224(AX)
+	MOVOU      X2, 352(AX)
+	MOVOU      X3, 480(AX)
+	MOVOU      X4, 608(AX)
+	MOVOU      X5, 736(AX)
+	MOVOU      X6, 864(AX)
+	MOVOU      X7, 992(AX)
+	MOVOU      112(AX), X0
+	MOVOU      240(AX), X1
+	MOVOU      368(AX), X2
+	MOVOU      496(AX), X3
+	MOVOU      624(AX), X4
+	MOVOU      752(AX), X5
+	MOVOU      880(AX), X6
+	MOVOU      1008(AX), X7
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFD     $0xb1, X6, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	PSHUFB     X10, X2
+	MOVO       X0, X8
+	PMULULQ    X2, X8
+	PADDQ      X2, X0
+	PADDQ      X8, X0
+	PADDQ      X8, X0
+	PXOR       X0, X6
+	PSHUFB     X11, X6
+	MOVO       X4, X8
+	PMULULQ    X6, X8
+	PADDQ      X6, X4
+	PADDQ      X8, X4
+	PADDQ      X8, X4
+	PXOR       X4, X2
+	MOVO       X2, X8
+	PADDQ      X2, X8
+	PSRLQ      $0x3f, X2
+	PXOR       X8, X2
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X7, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	PSHUFB     X10, X3
+	MOVO       X1, X8
+	PMULULQ    X3, X8
+	PADDQ      X3, X1
+	PADDQ      X8, X1
+	PADDQ      X8, X1
+	PXOR       X1, X7
+	PSHUFB     X11, X7
+	MOVO       X5, X8
+	PMULULQ    X7, X8
+	PADDQ      X7, X5
+	PADDQ      X8, X5
+	PADDQ      X8, X5
+	PXOR       X5, X3
+	MOVO       X3, X8
+	PADDQ      X3, X8
+	PSRLQ      $0x3f, X3
+	PXOR       X8, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      X0, 112(AX)
+	MOVOU      X1, 240(AX)
+	MOVOU      X2, 368(AX)
+	MOVOU      X3, 496(AX)
+	MOVOU      X4, 624(AX)
+	MOVOU      X5, 752(AX)
+	MOVOU      X6, 880(AX)
+	MOVOU      X7, 1008(AX)
+	RET
 
-	BLAMKA_ROUND_0(AX, 0, X8, X9, X10, X11)
-	BLAMKA_ROUND_0(AX, 16, X8, X9, X10, X11)
-	BLAMKA_ROUND_0(AX, 32, X8, X9, X10, X11)
-	BLAMKA_ROUND_0(AX, 48, X8, X9, X10, X11)
-	BLAMKA_ROUND_0(AX, 64, X8, X9, X10, X11)
-	BLAMKA_ROUND_0(AX, 80, X8, X9, X10, X11)
-	BLAMKA_ROUND_0(AX, 96, X8, X9, X10, X11)
-	BLAMKA_ROUND_0(AX, 112, X8, X9, X10, X11)
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
 
-	BLAMKA_ROUND_1(AX, 0, X8, X9, X10, X11)
-	BLAMKA_ROUND_1(AX, 2, X8, X9, X10, X11)
-	BLAMKA_ROUND_1(AX, 4, X8, X9, X10, X11)
-	BLAMKA_ROUND_1(AX, 6, X8, X9, X10, X11)
-	BLAMKA_ROUND_1(AX, 8, X8, X9, X10, X11)
-	BLAMKA_ROUND_1(AX, 10, X8, X9, X10, X11)
-	BLAMKA_ROUND_1(AX, 12, X8, X9, X10, X11)
-	BLAMKA_ROUND_1(AX, 14, X8, X9, X10, X11)
-	RET
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
 
-// func mixBlocksSSE2(out, a, b, c *block)
-TEXT ·mixBlocksSSE2(SB), 4, $0-32
+// func mixBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·mixBlocksSSE2(SB), NOSPLIT, $0-32
 	MOVQ out+0(FP), DX
 	MOVQ a+8(FP), AX
 	MOVQ b+16(FP), BX
 	MOVQ c+24(FP), CX
-	MOVQ $128, DI
+	MOVQ $0x00000080, DI
 
 loop:
-	MOVOU 0(AX), X0
-	MOVOU 0(BX), X1
-	MOVOU 0(CX), X2
+	MOVOU (AX), X0
+	MOVOU (BX), X1
+	MOVOU (CX), X2
 	PXOR  X1, X0
 	PXOR  X2, X0
-	MOVOU X0, 0(DX)
-	ADDQ  $16, AX
-	ADDQ  $16, BX
-	ADDQ  $16, CX
-	ADDQ  $16, DX
-	SUBQ  $2, DI
+	MOVOU X0, (DX)
+	ADDQ  $0x10, AX
+	ADDQ  $0x10, BX
+	ADDQ  $0x10, CX
+	ADDQ  $0x10, DX
+	SUBQ  $0x02, DI
 	JA    loop
 	RET
 
-// func xorBlocksSSE2(out, a, b, c *block)
-TEXT ·xorBlocksSSE2(SB), 4, $0-32
+// func xorBlocksSSE2(out *block, a *block, b *block, c *block)
+// Requires: SSE2
+TEXT ·xorBlocksSSE2(SB), NOSPLIT, $0-32
 	MOVQ out+0(FP), DX
 	MOVQ a+8(FP), AX
 	MOVQ b+16(FP), BX
 	MOVQ c+24(FP), CX
-	MOVQ $128, DI
+	MOVQ $0x00000080, DI
 
 loop:
-	MOVOU 0(AX), X0
-	MOVOU 0(BX), X1
-	MOVOU 0(CX), X2
-	MOVOU 0(DX), X3
+	MOVOU (AX), X0
+	MOVOU (BX), X1
+	MOVOU (CX), X2
+	MOVOU (DX), X3
 	PXOR  X1, X0
 	PXOR  X2, X0
 	PXOR  X3, X0
-	MOVOU X0, 0(DX)
-	ADDQ  $16, AX
-	ADDQ  $16, BX
-	ADDQ  $16, CX
-	ADDQ  $16, DX
-	SUBQ  $2, DI
+	MOVOU X0, (DX)
+	ADDQ  $0x10, AX
+	ADDQ  $0x10, BX
+	ADDQ  $0x10, CX
+	ADDQ  $0x10, DX
+	SUBQ  $0x02, DI
 	JA    loop
 	RET
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
index 9ae8206c..f75162e0 100644
--- a/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
+++ b/vendor/golang.org/x/crypto/blake2b/blake2bAVX2_amd64.s
@@ -1,722 +1,4517 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blake2bAVX2_amd64_asm.go -out ../../blake2bAVX2_amd64.s -pkg blake2b. DO NOT EDIT.
 
 //go:build amd64 && gc && !purego
 
 #include "textflag.h"
 
-DATA ·AVX2_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX2_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-DATA ·AVX2_iv0<>+0x10(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX2_iv0<>+0x18(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX2_iv0<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_iv1<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX2_iv1<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-DATA ·AVX2_iv1<>+0x10(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX2_iv1<>+0x18(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX2_iv1<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-DATA ·AVX2_c40<>+0x10(SB)/8, $0x0201000706050403
-DATA ·AVX2_c40<>+0x18(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX2_c40<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX2_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-DATA ·AVX2_c48<>+0x10(SB)/8, $0x0100070605040302
-DATA ·AVX2_c48<>+0x18(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX2_c48<>(SB), (NOPTR+RODATA), $32
-
-DATA ·AVX_iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·AVX_iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·AVX_iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·AVX_iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·AVX_iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·AVX_iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·AVX_iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·AVX_iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·AVX_iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·AVX_c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·AVX_c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·AVX_c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·AVX_c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·AVX_c48<>(SB), (NOPTR+RODATA), $16
-
-#define VPERMQ_0x39_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x39
-#define VPERMQ_0x93_Y1_Y1 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xc9; BYTE $0x93
-#define VPERMQ_0x4E_Y2_Y2 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xd2; BYTE $0x4e
-#define VPERMQ_0x93_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x93
-#define VPERMQ_0x39_Y3_Y3 BYTE $0xc4; BYTE $0xe3; BYTE $0xfd; BYTE $0x00; BYTE $0xdb; BYTE $0x39
-
-#define ROUND_AVX2(m0, m1, m2, m3, t, c40, c48) \
-	VPADDQ  m0, Y0, Y0;   \
-	VPADDQ  Y1, Y0, Y0;   \
-	VPXOR   Y0, Y3, Y3;   \
-	VPSHUFD $-79, Y3, Y3; \
-	VPADDQ  Y3, Y2, Y2;   \
-	VPXOR   Y2, Y1, Y1;   \
-	VPSHUFB c40, Y1, Y1;  \
-	VPADDQ  m1, Y0, Y0;   \
-	VPADDQ  Y1, Y0, Y0;   \
-	VPXOR   Y0, Y3, Y3;   \
-	VPSHUFB c48, Y3, Y3;  \
-	VPADDQ  Y3, Y2, Y2;   \
-	VPXOR   Y2, Y1, Y1;   \
-	VPADDQ  Y1, Y1, t;    \
-	VPSRLQ  $63, Y1, Y1;  \
-	VPXOR   t, Y1, Y1;    \
-	VPERMQ_0x39_Y1_Y1;    \
-	VPERMQ_0x4E_Y2_Y2;    \
-	VPERMQ_0x93_Y3_Y3;    \
-	VPADDQ  m2, Y0, Y0;   \
-	VPADDQ  Y1, Y0, Y0;   \
-	VPXOR   Y0, Y3, Y3;   \
-	VPSHUFD $-79, Y3, Y3; \
-	VPADDQ  Y3, Y2, Y2;   \
-	VPXOR   Y2, Y1, Y1;   \
-	VPSHUFB c40, Y1, Y1;  \
-	VPADDQ  m3, Y0, Y0;   \
-	VPADDQ  Y1, Y0, Y0;   \
-	VPXOR   Y0, Y3, Y3;   \
-	VPSHUFB c48, Y3, Y3;  \
-	VPADDQ  Y3, Y2, Y2;   \
-	VPXOR   Y2, Y1, Y1;   \
-	VPADDQ  Y1, Y1, t;    \
-	VPSRLQ  $63, Y1, Y1;  \
-	VPXOR   t, Y1, Y1;    \
-	VPERMQ_0x39_Y3_Y3;    \
-	VPERMQ_0x4E_Y2_Y2;    \
-	VPERMQ_0x93_Y1_Y1
-
-#define VMOVQ_SI_X11_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x1E
-#define VMOVQ_SI_X12_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x26
-#define VMOVQ_SI_X13_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x2E
-#define VMOVQ_SI_X14_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x36
-#define VMOVQ_SI_X15_0 BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x3E
-
-#define VMOVQ_SI_X11(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x5E; BYTE $n
-#define VMOVQ_SI_X12(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x66; BYTE $n
-#define VMOVQ_SI_X13(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x6E; BYTE $n
-#define VMOVQ_SI_X14(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x76; BYTE $n
-#define VMOVQ_SI_X15(n) BYTE $0xC5; BYTE $0x7A; BYTE $0x7E; BYTE $0x7E; BYTE $n
-
-#define VPINSRQ_1_SI_X11_0 BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x1E; BYTE $0x01
-#define VPINSRQ_1_SI_X12_0 BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x26; BYTE $0x01
-#define VPINSRQ_1_SI_X13_0 BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x2E; BYTE $0x01
-#define VPINSRQ_1_SI_X14_0 BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x36; BYTE $0x01
-#define VPINSRQ_1_SI_X15_0 BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x3E; BYTE $0x01
-
-#define VPINSRQ_1_SI_X11(n) BYTE $0xC4; BYTE $0x63; BYTE $0xA1; BYTE $0x22; BYTE $0x5E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X12(n) BYTE $0xC4; BYTE $0x63; BYTE $0x99; BYTE $0x22; BYTE $0x66; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X13(n) BYTE $0xC4; BYTE $0x63; BYTE $0x91; BYTE $0x22; BYTE $0x6E; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X14(n) BYTE $0xC4; BYTE $0x63; BYTE $0x89; BYTE $0x22; BYTE $0x76; BYTE $n; BYTE $0x01
-#define VPINSRQ_1_SI_X15(n) BYTE $0xC4; BYTE $0x63; BYTE $0x81; BYTE $0x22; BYTE $0x7E; BYTE $n; BYTE $0x01
-
-#define VMOVQ_R8_X15 BYTE $0xC4; BYTE $0x41; BYTE $0xF9; BYTE $0x6E; BYTE $0xF8
-#define VPINSRQ_1_R9_X15 BYTE $0xC4; BYTE $0x43; BYTE $0x81; BYTE $0x22; BYTE $0xF9; BYTE $0x01
-
-// load msg: Y12 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y12(i0, i1, i2, i3) \
-	VMOVQ_SI_X12(i0*8);           \
-	VMOVQ_SI_X11(i2*8);           \
-	VPINSRQ_1_SI_X12(i1*8);       \
-	VPINSRQ_1_SI_X11(i3*8);       \
-	VINSERTI128 $1, X11, Y12, Y12
-
-// load msg: Y13 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y13(i0, i1, i2, i3) \
-	VMOVQ_SI_X13(i0*8);           \
-	VMOVQ_SI_X11(i2*8);           \
-	VPINSRQ_1_SI_X13(i1*8);       \
-	VPINSRQ_1_SI_X11(i3*8);       \
-	VINSERTI128 $1, X11, Y13, Y13
-
-// load msg: Y14 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y14(i0, i1, i2, i3) \
-	VMOVQ_SI_X14(i0*8);           \
-	VMOVQ_SI_X11(i2*8);           \
-	VPINSRQ_1_SI_X14(i1*8);       \
-	VPINSRQ_1_SI_X11(i3*8);       \
-	VINSERTI128 $1, X11, Y14, Y14
-
-// load msg: Y15 = (i0, i1, i2, i3)
-// i0, i1, i2, i3 must not be 0
-#define LOAD_MSG_AVX2_Y15(i0, i1, i2, i3) \
-	VMOVQ_SI_X15(i0*8);           \
-	VMOVQ_SI_X11(i2*8);           \
-	VPINSRQ_1_SI_X15(i1*8);       \
-	VPINSRQ_1_SI_X11(i3*8);       \
-	VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15() \
-	VMOVQ_SI_X12_0;                   \
-	VMOVQ_SI_X11(4*8);                \
-	VPINSRQ_1_SI_X12(2*8);            \
-	VPINSRQ_1_SI_X11(6*8);            \
-	VINSERTI128 $1, X11, Y12, Y12;    \
-	LOAD_MSG_AVX2_Y13(1, 3, 5, 7);    \
-	LOAD_MSG_AVX2_Y14(8, 10, 12, 14); \
-	LOAD_MSG_AVX2_Y15(9, 11, 13, 15)
-
-#define LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3() \
-	LOAD_MSG_AVX2_Y12(14, 4, 9, 13); \
-	LOAD_MSG_AVX2_Y13(10, 8, 15, 6); \
-	VMOVQ_SI_X11(11*8);              \
-	VPSHUFD     $0x4E, 0*8(SI), X14; \
-	VPINSRQ_1_SI_X11(5*8);           \
-	VINSERTI128 $1, X11, Y14, Y14;   \
-	LOAD_MSG_AVX2_Y15(12, 2, 7, 3)
-
-#define LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4() \
-	VMOVQ_SI_X11(5*8);              \
-	VMOVDQU     11*8(SI), X12;      \
-	VPINSRQ_1_SI_X11(15*8);         \
-	VINSERTI128 $1, X11, Y12, Y12;  \
-	VMOVQ_SI_X13(8*8);              \
-	VMOVQ_SI_X11(2*8);              \
-	VPINSRQ_1_SI_X13_0;             \
-	VPINSRQ_1_SI_X11(13*8);         \
-	VINSERTI128 $1, X11, Y13, Y13;  \
-	LOAD_MSG_AVX2_Y14(10, 3, 7, 9); \
-	LOAD_MSG_AVX2_Y15(14, 6, 1, 4)
-
-#define LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8() \
-	LOAD_MSG_AVX2_Y12(7, 3, 13, 11); \
-	LOAD_MSG_AVX2_Y13(9, 1, 12, 14); \
-	LOAD_MSG_AVX2_Y14(2, 5, 4, 15);  \
-	VMOVQ_SI_X15(6*8);               \
-	VMOVQ_SI_X11_0;                  \
-	VPINSRQ_1_SI_X15(10*8);          \
-	VPINSRQ_1_SI_X11(8*8);           \
-	VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13() \
-	LOAD_MSG_AVX2_Y12(9, 5, 2, 10);  \
-	VMOVQ_SI_X13_0;                  \
-	VMOVQ_SI_X11(4*8);               \
-	VPINSRQ_1_SI_X13(7*8);           \
-	VPINSRQ_1_SI_X11(15*8);          \
-	VINSERTI128 $1, X11, Y13, Y13;   \
-	LOAD_MSG_AVX2_Y14(14, 11, 6, 3); \
-	LOAD_MSG_AVX2_Y15(1, 12, 8, 13)
-
-#define LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9() \
-	VMOVQ_SI_X12(2*8);                \
-	VMOVQ_SI_X11_0;                   \
-	VPINSRQ_1_SI_X12(6*8);            \
-	VPINSRQ_1_SI_X11(8*8);            \
-	VINSERTI128 $1, X11, Y12, Y12;    \
-	LOAD_MSG_AVX2_Y13(12, 10, 11, 3); \
-	LOAD_MSG_AVX2_Y14(4, 7, 15, 1);   \
-	LOAD_MSG_AVX2_Y15(13, 5, 14, 9)
-
-#define LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11() \
-	LOAD_MSG_AVX2_Y12(12, 1, 14, 4);  \
-	LOAD_MSG_AVX2_Y13(5, 15, 13, 10); \
-	VMOVQ_SI_X14_0;                   \
-	VPSHUFD     $0x4E, 8*8(SI), X11;  \
-	VPINSRQ_1_SI_X14(6*8);            \
-	VINSERTI128 $1, X11, Y14, Y14;    \
-	LOAD_MSG_AVX2_Y15(7, 3, 2, 11)
-
-#define LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10() \
-	LOAD_MSG_AVX2_Y12(13, 7, 12, 3); \
-	LOAD_MSG_AVX2_Y13(11, 14, 1, 9); \
-	LOAD_MSG_AVX2_Y14(5, 15, 8, 2);  \
-	VMOVQ_SI_X15_0;                  \
-	VMOVQ_SI_X11(6*8);               \
-	VPINSRQ_1_SI_X15(4*8);           \
-	VPINSRQ_1_SI_X11(10*8);          \
-	VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5() \
-	VMOVQ_SI_X12(6*8);              \
-	VMOVQ_SI_X11(11*8);             \
-	VPINSRQ_1_SI_X12(14*8);         \
-	VPINSRQ_1_SI_X11_0;             \
-	VINSERTI128 $1, X11, Y12, Y12;  \
-	LOAD_MSG_AVX2_Y13(15, 9, 3, 8); \
-	VMOVQ_SI_X11(1*8);              \
-	VMOVDQU     12*8(SI), X14;      \
-	VPINSRQ_1_SI_X11(10*8);         \
-	VINSERTI128 $1, X11, Y14, Y14;  \
-	VMOVQ_SI_X15(2*8);              \
-	VMOVDQU     4*8(SI), X11;       \
-	VPINSRQ_1_SI_X15(7*8);          \
-	VINSERTI128 $1, X11, Y15, Y15
-
-#define LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0() \
-	LOAD_MSG_AVX2_Y12(10, 8, 7, 1);  \
-	VMOVQ_SI_X13(2*8);               \
-	VPSHUFD     $0x4E, 5*8(SI), X11; \
-	VPINSRQ_1_SI_X13(4*8);           \
-	VINSERTI128 $1, X11, Y13, Y13;   \
-	LOAD_MSG_AVX2_Y14(15, 9, 3, 13); \
-	VMOVQ_SI_X15(11*8);              \
-	VMOVQ_SI_X11(12*8);              \
-	VPINSRQ_1_SI_X15(14*8);          \
-	VPINSRQ_1_SI_X11_0;              \
-	VINSERTI128 $1, X11, Y15, Y15
-
 // func hashBlocksAVX2(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX2(SB), 4, $320-48 // frame size = 288 + 32 byte alignment
-	MOVQ h+0(FP), AX
-	MOVQ c+8(FP), BX
-	MOVQ flag+16(FP), CX
-	MOVQ blocks_base+24(FP), SI
-	MOVQ blocks_len+32(FP), DI
-
-	MOVQ SP, DX
-	ADDQ $31, DX
-	ANDQ $~31, DX
-
-	MOVQ CX, 16(DX)
-	XORQ CX, CX
-	MOVQ CX, 24(DX)
-
-	VMOVDQU ·AVX2_c40<>(SB), Y4
-	VMOVDQU ·AVX2_c48<>(SB), Y5
-
-	VMOVDQU 0(AX), Y8
+// Requires: AVX, AVX2
+TEXT ·hashBlocksAVX2(SB), NOSPLIT, $320-48
+	MOVQ    h+0(FP), AX
+	MOVQ    c+8(FP), BX
+	MOVQ    flag+16(FP), CX
+	MOVQ    blocks_base+24(FP), SI
+	MOVQ    blocks_len+32(FP), DI
+	MOVQ    SP, DX
+	ADDQ    $+31, DX
+	ANDQ    $-32, DX
+	MOVQ    CX, 16(DX)
+	XORQ    CX, CX
+	MOVQ    CX, 24(DX)
+	VMOVDQU ·AVX2_c40<>+0(SB), Y4
+	VMOVDQU ·AVX2_c48<>+0(SB), Y5
+	VMOVDQU (AX), Y8
 	VMOVDQU 32(AX), Y9
-	VMOVDQU ·AVX2_iv0<>(SB), Y6
-	VMOVDQU ·AVX2_iv1<>(SB), Y7
-
-	MOVQ 0(BX), R8
-	MOVQ 8(BX), R9
-	MOVQ R9, 8(DX)
+	VMOVDQU ·AVX2_iv0<>+0(SB), Y6
+	VMOVDQU ·AVX2_iv1<>+0(SB), Y7
+	MOVQ    (BX), R8
+	MOVQ    8(BX), R9
+	MOVQ    R9, 8(DX)
 
 loop:
-	ADDQ $128, R8
-	MOVQ R8, 0(DX)
-	CMPQ R8, $128
+	ADDQ $0x80, R8
+	MOVQ R8, (DX)
+	CMPQ R8, $0x80
 	JGE  noinc
 	INCQ R9
 	MOVQ R9, 8(DX)
 
 noinc:
-	VMOVDQA Y8, Y0
-	VMOVDQA Y9, Y1
-	VMOVDQA Y6, Y2
-	VPXOR   0(DX), Y7, Y3
-
-	LOAD_MSG_AVX2_0_2_4_6_1_3_5_7_8_10_12_14_9_11_13_15()
-	VMOVDQA Y12, 32(DX)
-	VMOVDQA Y13, 64(DX)
-	VMOVDQA Y14, 96(DX)
-	VMOVDQA Y15, 128(DX)
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_14_4_9_13_10_8_15_6_1_0_11_5_12_2_7_3()
-	VMOVDQA Y12, 160(DX)
-	VMOVDQA Y13, 192(DX)
-	VMOVDQA Y14, 224(DX)
-	VMOVDQA Y15, 256(DX)
-
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_11_12_5_15_8_0_2_13_10_3_7_9_14_6_1_4()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_7_3_13_11_9_1_12_14_2_5_4_15_6_10_0_8()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_9_5_2_10_0_7_4_15_14_11_6_3_1_12_8_13()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_2_6_0_8_12_10_11_3_4_7_15_1_13_5_14_9()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_12_1_14_4_5_15_13_10_0_6_9_8_7_3_2_11()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_13_7_12_3_11_14_1_9_5_15_8_2_0_4_6_10()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_6_14_11_0_15_9_3_8_12_13_1_10_2_7_4_5()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-	LOAD_MSG_AVX2_10_8_7_1_2_4_6_5_15_9_3_13_11_14_12_0()
-	ROUND_AVX2(Y12, Y13, Y14, Y15, Y10, Y4, Y5)
-
-	ROUND_AVX2(32(DX), 64(DX), 96(DX), 128(DX), Y10, Y4, Y5)
-	ROUND_AVX2(160(DX), 192(DX), 224(DX), 256(DX), Y10, Y4, Y5)
-
-	VPXOR Y0, Y8, Y8
-	VPXOR Y1, Y9, Y9
-	VPXOR Y2, Y8, Y8
-	VPXOR Y3, Y9, Y9
-
-	LEAQ 128(SI), SI
-	SUBQ $128, DI
-	JNE  loop
-
-	MOVQ R8, 0(BX)
-	MOVQ R9, 8(BX)
-
-	VMOVDQU Y8, 0(AX)
-	VMOVDQU Y9, 32(AX)
+	VMOVDQA     Y8, Y0
+	VMOVDQA     Y9, Y1
+	VMOVDQA     Y6, Y2
+	VPXOR       (DX), Y7, Y3
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x26
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x10
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x08
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x28
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x40
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x50
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x48
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x58
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VMOVDQA     Y12, 32(DX)
+	VMOVDQA     Y13, 64(DX)
+	VMOVDQA     Y14, 96(DX)
+	VMOVDQA     Y15, 128(DX)
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x70
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x20
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x50
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x40
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x58
+	VPSHUFD     $0x4e, (SI), X14
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x28
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x60
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x10
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VMOVDQA     Y12, 160(DX)
+	VMOVDQA     Y13, 192(DX)
+	VMOVDQA     Y14, 224(DX)
+	VMOVDQA     Y15, 256(DX)
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x28
+	VMOVDQU     88(SI), X12
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x40
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x2e
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x50
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x70
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x30
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x38
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x48
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x08
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x10
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x28
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x30
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x1e
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x50
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x48
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x28
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x2e
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x38
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x70
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x58
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x08
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x60
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x10
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x1e
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x30
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x60
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x50
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x20
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x78
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x38
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x68
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x28
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x60
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x70
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x08
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x20
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x28
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x78
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x36
+	VPSHUFD     $0x4e, 64(SI), X11
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x30
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x38
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x18
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x68
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x38
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x58
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x70
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x48
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x28
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x78
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x10
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x3e
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x30
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x20
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x30
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x58
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x70
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x1e
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x78
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x48
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x40
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x08
+	VMOVDQU     96(SI), X14
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x50
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x10
+	VMOVDQU     32(SI), X11
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x38
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x66
+	BYTE        $0x50
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x38
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x99
+	BYTE        $0x22
+	BYTE        $0x66
+	BYTE        $0x40
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x08
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y12, Y12
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x6e
+	BYTE        $0x10
+	VPSHUFD     $0x4e, 40(SI), X11
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x91
+	BYTE        $0x22
+	BYTE        $0x6e
+	BYTE        $0x20
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y13, Y13
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x76
+	BYTE        $0x78
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x18
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x89
+	BYTE        $0x22
+	BYTE        $0x76
+	BYTE        $0x48
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x5e
+	BYTE        $0x68
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y14, Y14
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x7e
+	BYTE        $0x58
+	BYTE        $0xc5
+	BYTE        $0x7a
+	BYTE        $0x7e
+	BYTE        $0x5e
+	BYTE        $0x60
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0x81
+	BYTE        $0x22
+	BYTE        $0x7e
+	BYTE        $0x70
+	BYTE        $0x01
+	BYTE        $0xc4
+	BYTE        $0x63
+	BYTE        $0xa1
+	BYTE        $0x22
+	BYTE        $0x1e
+	BYTE        $0x01
+	VINSERTI128 $0x01, X11, Y15, Y15
+	VPADDQ      Y12, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y13, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      Y14, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      Y15, Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	VPADDQ      32(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      64(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      96(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      128(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	VPADDQ      160(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      192(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x93
+	VPADDQ      224(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFD     $-79, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPSHUFB     Y4, Y1, Y1
+	VPADDQ      256(DX), Y0, Y0
+	VPADDQ      Y1, Y0, Y0
+	VPXOR       Y0, Y3, Y3
+	VPSHUFB     Y5, Y3, Y3
+	VPADDQ      Y3, Y2, Y2
+	VPXOR       Y2, Y1, Y1
+	VPADDQ      Y1, Y1, Y10
+	VPSRLQ      $0x3f, Y1, Y1
+	VPXOR       Y10, Y1, Y1
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xdb
+	BYTE        $0x39
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xd2
+	BYTE        $0x4e
+	BYTE        $0xc4
+	BYTE        $0xe3
+	BYTE        $0xfd
+	BYTE        $0x00
+	BYTE        $0xc9
+	BYTE        $0x93
+	VPXOR       Y0, Y8, Y8
+	VPXOR       Y1, Y9, Y9
+	VPXOR       Y2, Y8, Y8
+	VPXOR       Y3, Y9, Y9
+	LEAQ        128(SI), SI
+	SUBQ        $0x80, DI
+	JNE         loop
+	MOVQ        R8, (BX)
+	MOVQ        R9, 8(BX)
+	VMOVDQU     Y8, (AX)
+	VMOVDQU     Y9, 32(AX)
 	VZEROUPPER
-
 	RET
 
-#define VPUNPCKLQDQ_X2_X2_X15 BYTE $0xC5; BYTE $0x69; BYTE $0x6C; BYTE $0xFA
-#define VPUNPCKLQDQ_X3_X3_X15 BYTE $0xC5; BYTE $0x61; BYTE $0x6C; BYTE $0xFB
-#define VPUNPCKLQDQ_X7_X7_X15 BYTE $0xC5; BYTE $0x41; BYTE $0x6C; BYTE $0xFF
-#define VPUNPCKLQDQ_X13_X13_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x11; BYTE $0x6C; BYTE $0xFD
-#define VPUNPCKLQDQ_X14_X14_X15 BYTE $0xC4; BYTE $0x41; BYTE $0x09; BYTE $0x6C; BYTE $0xFE
-
-#define VPUNPCKHQDQ_X15_X2_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x69; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X3_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X6_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x49; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X7_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xFF
-#define VPUNPCKHQDQ_X15_X3_X2 BYTE $0xC4; BYTE $0xC1; BYTE $0x61; BYTE $0x6D; BYTE $0xD7
-#define VPUNPCKHQDQ_X15_X7_X6 BYTE $0xC4; BYTE $0xC1; BYTE $0x41; BYTE $0x6D; BYTE $0xF7
-#define VPUNPCKHQDQ_X15_X13_X3 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xDF
-#define VPUNPCKHQDQ_X15_X13_X7 BYTE $0xC4; BYTE $0xC1; BYTE $0x11; BYTE $0x6D; BYTE $0xFF
-
-#define SHUFFLE_AVX() \
-	VMOVDQA X6, X13;         \
-	VMOVDQA X2, X14;         \
-	VMOVDQA X4, X6;          \
-	VPUNPCKLQDQ_X13_X13_X15; \
-	VMOVDQA X5, X4;          \
-	VMOVDQA X6, X5;          \
-	VPUNPCKHQDQ_X15_X7_X6;   \
-	VPUNPCKLQDQ_X7_X7_X15;   \
-	VPUNPCKHQDQ_X15_X13_X7;  \
-	VPUNPCKLQDQ_X3_X3_X15;   \
-	VPUNPCKHQDQ_X15_X2_X2;   \
-	VPUNPCKLQDQ_X14_X14_X15; \
-	VPUNPCKHQDQ_X15_X3_X3;   \
-
-#define SHUFFLE_AVX_INV() \
-	VMOVDQA X2, X13;         \
-	VMOVDQA X4, X14;         \
-	VPUNPCKLQDQ_X2_X2_X15;   \
-	VMOVDQA X5, X4;          \
-	VPUNPCKHQDQ_X15_X3_X2;   \
-	VMOVDQA X14, X5;         \
-	VPUNPCKLQDQ_X3_X3_X15;   \
-	VMOVDQA X6, X14;         \
-	VPUNPCKHQDQ_X15_X13_X3;  \
-	VPUNPCKLQDQ_X7_X7_X15;   \
-	VPUNPCKHQDQ_X15_X6_X6;   \
-	VPUNPCKLQDQ_X14_X14_X15; \
-	VPUNPCKHQDQ_X15_X7_X7;   \
-
-#define HALF_ROUND_AVX(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
-	VPADDQ  m0, v0, v0;   \
-	VPADDQ  v2, v0, v0;   \
-	VPADDQ  m1, v1, v1;   \
-	VPADDQ  v3, v1, v1;   \
-	VPXOR   v0, v6, v6;   \
-	VPXOR   v1, v7, v7;   \
-	VPSHUFD $-79, v6, v6; \
-	VPSHUFD $-79, v7, v7; \
-	VPADDQ  v6, v4, v4;   \
-	VPADDQ  v7, v5, v5;   \
-	VPXOR   v4, v2, v2;   \
-	VPXOR   v5, v3, v3;   \
-	VPSHUFB c40, v2, v2;  \
-	VPSHUFB c40, v3, v3;  \
-	VPADDQ  m2, v0, v0;   \
-	VPADDQ  v2, v0, v0;   \
-	VPADDQ  m3, v1, v1;   \
-	VPADDQ  v3, v1, v1;   \
-	VPXOR   v0, v6, v6;   \
-	VPXOR   v1, v7, v7;   \
-	VPSHUFB c48, v6, v6;  \
-	VPSHUFB c48, v7, v7;  \
-	VPADDQ  v6, v4, v4;   \
-	VPADDQ  v7, v5, v5;   \
-	VPXOR   v4, v2, v2;   \
-	VPXOR   v5, v3, v3;   \
-	VPADDQ  v2, v2, t0;   \
-	VPSRLQ  $63, v2, v2;  \
-	VPXOR   t0, v2, v2;   \
-	VPADDQ  v3, v3, t0;   \
-	VPSRLQ  $63, v3, v3;  \
-	VPXOR   t0, v3, v3
-
-// load msg: X12 = (i0, i1), X13 = (i2, i3), X14 = (i4, i5), X15 = (i6, i7)
-// i0, i1, i2, i3, i4, i5, i6, i7 must not be 0
-#define LOAD_MSG_AVX(i0, i1, i2, i3, i4, i5, i6, i7) \
-	VMOVQ_SI_X12(i0*8);     \
-	VMOVQ_SI_X13(i2*8);     \
-	VMOVQ_SI_X14(i4*8);     \
-	VMOVQ_SI_X15(i6*8);     \
-	VPINSRQ_1_SI_X12(i1*8); \
-	VPINSRQ_1_SI_X13(i3*8); \
-	VPINSRQ_1_SI_X14(i5*8); \
-	VPINSRQ_1_SI_X15(i7*8)
-
-// load msg: X12 = (0, 2), X13 = (4, 6), X14 = (1, 3), X15 = (5, 7)
-#define LOAD_MSG_AVX_0_2_4_6_1_3_5_7() \
-	VMOVQ_SI_X12_0;        \
-	VMOVQ_SI_X13(4*8);     \
-	VMOVQ_SI_X14(1*8);     \
-	VMOVQ_SI_X15(5*8);     \
-	VPINSRQ_1_SI_X12(2*8); \
-	VPINSRQ_1_SI_X13(6*8); \
-	VPINSRQ_1_SI_X14(3*8); \
-	VPINSRQ_1_SI_X15(7*8)
-
-// load msg: X12 = (1, 0), X13 = (11, 5), X14 = (12, 2), X15 = (7, 3)
-#define LOAD_MSG_AVX_1_0_11_5_12_2_7_3() \
-	VPSHUFD $0x4E, 0*8(SI), X12; \
-	VMOVQ_SI_X13(11*8);          \
-	VMOVQ_SI_X14(12*8);          \
-	VMOVQ_SI_X15(7*8);           \
-	VPINSRQ_1_SI_X13(5*8);       \
-	VPINSRQ_1_SI_X14(2*8);       \
-	VPINSRQ_1_SI_X15(3*8)
-
-// load msg: X12 = (11, 12), X13 = (5, 15), X14 = (8, 0), X15 = (2, 13)
-#define LOAD_MSG_AVX_11_12_5_15_8_0_2_13() \
-	VMOVDQU 11*8(SI), X12;  \
-	VMOVQ_SI_X13(5*8);      \
-	VMOVQ_SI_X14(8*8);      \
-	VMOVQ_SI_X15(2*8);      \
-	VPINSRQ_1_SI_X13(15*8); \
-	VPINSRQ_1_SI_X14_0;     \
-	VPINSRQ_1_SI_X15(13*8)
-
-// load msg: X12 = (2, 5), X13 = (4, 15), X14 = (6, 10), X15 = (0, 8)
-#define LOAD_MSG_AVX_2_5_4_15_6_10_0_8() \
-	VMOVQ_SI_X12(2*8);      \
-	VMOVQ_SI_X13(4*8);      \
-	VMOVQ_SI_X14(6*8);      \
-	VMOVQ_SI_X15_0;         \
-	VPINSRQ_1_SI_X12(5*8);  \
-	VPINSRQ_1_SI_X13(15*8); \
-	VPINSRQ_1_SI_X14(10*8); \
-	VPINSRQ_1_SI_X15(8*8)
+DATA ·AVX2_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+DATA ·AVX2_c40<>+16(SB)/8, $0x0201000706050403
+DATA ·AVX2_c40<>+24(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX2_c40<>(SB), RODATA|NOPTR, $32
 
-// load msg: X12 = (9, 5), X13 = (2, 10), X14 = (0, 7), X15 = (4, 15)
-#define LOAD_MSG_AVX_9_5_2_10_0_7_4_15() \
-	VMOVQ_SI_X12(9*8);      \
-	VMOVQ_SI_X13(2*8);      \
-	VMOVQ_SI_X14_0;         \
-	VMOVQ_SI_X15(4*8);      \
-	VPINSRQ_1_SI_X12(5*8);  \
-	VPINSRQ_1_SI_X13(10*8); \
-	VPINSRQ_1_SI_X14(7*8);  \
-	VPINSRQ_1_SI_X15(15*8)
+DATA ·AVX2_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+DATA ·AVX2_c48<>+16(SB)/8, $0x0100070605040302
+DATA ·AVX2_c48<>+24(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX2_c48<>(SB), RODATA|NOPTR, $32
 
-// load msg: X12 = (2, 6), X13 = (0, 8), X14 = (12, 10), X15 = (11, 3)
-#define LOAD_MSG_AVX_2_6_0_8_12_10_11_3() \
-	VMOVQ_SI_X12(2*8);      \
-	VMOVQ_SI_X13_0;         \
-	VMOVQ_SI_X14(12*8);     \
-	VMOVQ_SI_X15(11*8);     \
-	VPINSRQ_1_SI_X12(6*8);  \
-	VPINSRQ_1_SI_X13(8*8);  \
-	VPINSRQ_1_SI_X14(10*8); \
-	VPINSRQ_1_SI_X15(3*8)
+DATA ·AVX2_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX2_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+DATA ·AVX2_iv0<>+16(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX2_iv0<>+24(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX2_iv0<>(SB), RODATA|NOPTR, $32
 
-// load msg: X12 = (0, 6), X13 = (9, 8), X14 = (7, 3), X15 = (2, 11)
-#define LOAD_MSG_AVX_0_6_9_8_7_3_2_11() \
-	MOVQ    0*8(SI), X12;        \
-	VPSHUFD $0x4E, 8*8(SI), X13; \
-	MOVQ    7*8(SI), X14;        \
-	MOVQ    2*8(SI), X15;        \
-	VPINSRQ_1_SI_X12(6*8);       \
-	VPINSRQ_1_SI_X14(3*8);       \
-	VPINSRQ_1_SI_X15(11*8)
-
-// load msg: X12 = (6, 14), X13 = (11, 0), X14 = (15, 9), X15 = (3, 8)
-#define LOAD_MSG_AVX_6_14_11_0_15_9_3_8() \
-	MOVQ 6*8(SI), X12;      \
-	MOVQ 11*8(SI), X13;     \
-	MOVQ 15*8(SI), X14;     \
-	MOVQ 3*8(SI), X15;      \
-	VPINSRQ_1_SI_X12(14*8); \
-	VPINSRQ_1_SI_X13_0;     \
-	VPINSRQ_1_SI_X14(9*8);  \
-	VPINSRQ_1_SI_X15(8*8)
-
-// load msg: X12 = (5, 15), X13 = (8, 2), X14 = (0, 4), X15 = (6, 10)
-#define LOAD_MSG_AVX_5_15_8_2_0_4_6_10() \
-	MOVQ 5*8(SI), X12;      \
-	MOVQ 8*8(SI), X13;      \
-	MOVQ 0*8(SI), X14;      \
-	MOVQ 6*8(SI), X15;      \
-	VPINSRQ_1_SI_X12(15*8); \
-	VPINSRQ_1_SI_X13(2*8);  \
-	VPINSRQ_1_SI_X14(4*8);  \
-	VPINSRQ_1_SI_X15(10*8)
-
-// load msg: X12 = (12, 13), X13 = (1, 10), X14 = (2, 7), X15 = (4, 5)
-#define LOAD_MSG_AVX_12_13_1_10_2_7_4_5() \
-	VMOVDQU 12*8(SI), X12;  \
-	MOVQ    1*8(SI), X13;   \
-	MOVQ    2*8(SI), X14;   \
-	VPINSRQ_1_SI_X13(10*8); \
-	VPINSRQ_1_SI_X14(7*8);  \
-	VMOVDQU 4*8(SI), X15
-
-// load msg: X12 = (15, 9), X13 = (3, 13), X14 = (11, 14), X15 = (12, 0)
-#define LOAD_MSG_AVX_15_9_3_13_11_14_12_0() \
-	MOVQ 15*8(SI), X12;     \
-	MOVQ 3*8(SI), X13;      \
-	MOVQ 11*8(SI), X14;     \
-	MOVQ 12*8(SI), X15;     \
-	VPINSRQ_1_SI_X12(9*8);  \
-	VPINSRQ_1_SI_X13(13*8); \
-	VPINSRQ_1_SI_X14(14*8); \
-	VPINSRQ_1_SI_X15_0
+DATA ·AVX2_iv1<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX2_iv1<>+8(SB)/8, $0x9b05688c2b3e6c1f
+DATA ·AVX2_iv1<>+16(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX2_iv1<>+24(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX2_iv1<>(SB), RODATA|NOPTR, $32
 
 // func hashBlocksAVX(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksAVX(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
-	MOVQ h+0(FP), AX
-	MOVQ c+8(FP), BX
-	MOVQ flag+16(FP), CX
-	MOVQ blocks_base+24(FP), SI
-	MOVQ blocks_len+32(FP), DI
-
-	MOVQ SP, R10
-	ADDQ $15, R10
-	ANDQ $~15, R10
-
-	VMOVDQU ·AVX_c40<>(SB), X0
-	VMOVDQU ·AVX_c48<>(SB), X1
+// Requires: AVX, SSE2
+TEXT ·hashBlocksAVX(SB), NOSPLIT, $288-48
+	MOVQ    h+0(FP), AX
+	MOVQ    c+8(FP), BX
+	MOVQ    flag+16(FP), CX
+	MOVQ    blocks_base+24(FP), SI
+	MOVQ    blocks_len+32(FP), DI
+	MOVQ    SP, R10
+	ADDQ    $0x0f, R10
+	ANDQ    $-16, R10
+	VMOVDQU ·AVX_c40<>+0(SB), X0
+	VMOVDQU ·AVX_c48<>+0(SB), X1
 	VMOVDQA X0, X8
 	VMOVDQA X1, X9
-
-	VMOVDQU ·AVX_iv3<>(SB), X0
-	VMOVDQA X0, 0(R10)
-	XORQ    CX, 0(R10)          // 0(R10) = ·AVX_iv3 ^ (CX || 0)
-
-	VMOVDQU 0(AX), X10
+	VMOVDQU ·AVX_iv3<>+0(SB), X0
+	VMOVDQA X0, (R10)
+	XORQ    CX, (R10)
+	VMOVDQU (AX), X10
 	VMOVDQU 16(AX), X11
 	VMOVDQU 32(AX), X2
 	VMOVDQU 48(AX), X3
-
-	MOVQ 0(BX), R8
-	MOVQ 8(BX), R9
+	MOVQ    (BX), R8
+	MOVQ    8(BX), R9
 
 loop:
-	ADDQ $128, R8
-	CMPQ R8, $128
+	ADDQ $0x80, R8
+	CMPQ R8, $0x80
 	JGE  noinc
 	INCQ R9
 
 noinc:
-	VMOVQ_R8_X15
-	VPINSRQ_1_R9_X15
-
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0xf9
+	BYTE    $0x6e
+	BYTE    $0xf8
+	BYTE    $0xc4
+	BYTE    $0x43
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0xf9
+	BYTE    $0x01
 	VMOVDQA X10, X0
 	VMOVDQA X11, X1
-	VMOVDQU ·AVX_iv0<>(SB), X4
-	VMOVDQU ·AVX_iv1<>(SB), X5
-	VMOVDQU ·AVX_iv2<>(SB), X6
-
+	VMOVDQU ·AVX_iv0<>+0(SB), X4
+	VMOVDQU ·AVX_iv1<>+0(SB), X5
+	VMOVDQU ·AVX_iv2<>+0(SB), X6
 	VPXOR   X15, X6, X6
-	VMOVDQA 0(R10), X7
-
-	LOAD_MSG_AVX_0_2_4_6_1_3_5_7()
+	VMOVDQA (R10), X7
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x26
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x20
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x08
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x28
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x10
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x38
+	BYTE    $0x01
 	VMOVDQA X12, 16(R10)
 	VMOVDQA X13, 32(R10)
 	VMOVDQA X14, 48(R10)
 	VMOVDQA X15, 64(R10)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX(8, 10, 12, 14, 9, 11, 13, 15)
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x40
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x58
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x78
+	BYTE    $0x01
 	VMOVDQA X12, 80(R10)
 	VMOVDQA X13, 96(R10)
 	VMOVDQA X14, 112(R10)
 	VMOVDQA X15, 128(R10)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX(14, 4, 9, 13, 10, 8, 15, 6)
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x50
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x78
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x68
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x40
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x30
+	BYTE    $0x01
 	VMOVDQA X12, 144(R10)
 	VMOVDQA X13, 160(R10)
 	VMOVDQA X14, 176(R10)
 	VMOVDQA X15, 192(R10)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX_1_0_11_5_12_2_7_3()
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VPSHUFD $0x4e, (SI), X12
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x58
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x38
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x10
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x18
+	BYTE    $0x01
 	VMOVDQA X12, 208(R10)
 	VMOVDQA X13, 224(R10)
 	VMOVDQA X14, 240(R10)
 	VMOVDQA X15, 256(R10)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX_11_12_5_15_8_0_2_13()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX(10, 3, 7, 9, 14, 6, 1, 4)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX(7, 3, 13, 11, 9, 1, 12, 14)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX_2_5_4_15_6_10_0_8()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX_9_5_2_10_0_7_4_15()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX(14, 11, 6, 3, 1, 12, 8, 13)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX_2_6_0_8_12_10_11_3()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX(4, 7, 15, 1, 13, 5, 14, 9)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX(12, 1, 14, 4, 5, 15, 13, 10)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX_0_6_9_8_7_3_2_11()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX(13, 7, 12, 3, 11, 14, 1, 9)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX_5_15_8_2_0_4_6_10()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX_6_14_11_0_15_9_3_8()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX_12_13_1_10_2_7_4_5()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	LOAD_MSG_AVX(10, 8, 7, 1, 2, 4, 6, 5)
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX()
-	LOAD_MSG_AVX_15_9_3_13_11_14_12_0()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, X12, X13, X14, X15, X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X15, X8, X9)
-	SHUFFLE_AVX()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X15, X8, X9)
-	SHUFFLE_AVX()
-	HALF_ROUND_AVX(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X15, X8, X9)
-	SHUFFLE_AVX_INV()
-
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	VMOVDQU 88(SI), X12
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x28
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x40
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x10
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x36
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x50
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x38
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x08
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x48
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x20
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x38
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x68
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x60
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x58
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x70
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x20
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x30
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x3e
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x40
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x48
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x36
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x20
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x38
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x78
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x30
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x08
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x40
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x58
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x60
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x2e
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x58
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x40
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x18
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x20
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x78
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x68
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x70
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x38
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x28
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x48
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x70
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x28
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x68
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x50
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	MOVQ    (SI), X12
+	VPSHUFD $0x4e, 64(SI), X13
+	MOVQ    56(SI), X14
+	MOVQ    16(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x30
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x58
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x68
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x60
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x58
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x08
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x38
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x18
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x48
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	MOVQ    40(SI), X12
+	MOVQ    64(SI), X13
+	MOVQ    (SI), X14
+	MOVQ    48(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x78
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x10
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x50
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	MOVQ    48(SI), X12
+	MOVQ    88(SI), X13
+	MOVQ    120(SI), X14
+	MOVQ    24(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x2e
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x48
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x40
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VMOVDQU 96(SI), X12
+	MOVQ    8(SI), X13
+	MOVQ    16(SI), X14
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x50
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x38
+	BYTE    $0x01
+	VMOVDQU 32(SI), X15
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x66
+	BYTE    $0x50
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x6e
+	BYTE    $0x38
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x76
+	BYTE    $0x10
+	BYTE    $0xc5
+	BYTE    $0x7a
+	BYTE    $0x7e
+	BYTE    $0x7e
+	BYTE    $0x30
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x40
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x08
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x20
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x7e
+	BYTE    $0x28
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	MOVQ    120(SI), X12
+	MOVQ    24(SI), X13
+	MOVQ    88(SI), X14
+	MOVQ    96(SI), X15
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x99
+	BYTE    $0x22
+	BYTE    $0x66
+	BYTE    $0x48
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x91
+	BYTE    $0x22
+	BYTE    $0x6e
+	BYTE    $0x68
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x89
+	BYTE    $0x22
+	BYTE    $0x76
+	BYTE    $0x70
+	BYTE    $0x01
+	BYTE    $0xc4
+	BYTE    $0x63
+	BYTE    $0x81
+	BYTE    $0x22
+	BYTE    $0x3e
+	BYTE    $0x01
+	VPADDQ  X12, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X13, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  X14, X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  X15, X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	VPADDQ  16(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  32(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  48(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  64(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VPADDQ  80(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  96(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  112(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  128(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
+	VPADDQ  144(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  160(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  176(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  192(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X6, X13
+	VMOVDQA X2, X14
+	VMOVDQA X4, X6
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x11
+	BYTE    $0x6c
+	BYTE    $0xfd
+	VMOVDQA X5, X4
+	VMOVDQA X6, X5
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xff
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x69
+	BYTE    $0x6d
+	BYTE    $0xd7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xdf
+	VPADDQ  208(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  224(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFD $-79, X6, X6
+	VPSHUFD $-79, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPSHUFB X8, X2, X2
+	VPSHUFB X8, X3, X3
+	VPADDQ  240(R10), X0, X0
+	VPADDQ  X2, X0, X0
+	VPADDQ  256(R10), X1, X1
+	VPADDQ  X3, X1, X1
+	VPXOR   X0, X6, X6
+	VPXOR   X1, X7, X7
+	VPSHUFB X9, X6, X6
+	VPSHUFB X9, X7, X7
+	VPADDQ  X6, X4, X4
+	VPADDQ  X7, X5, X5
+	VPXOR   X4, X2, X2
+	VPXOR   X5, X3, X3
+	VPADDQ  X2, X2, X15
+	VPSRLQ  $0x3f, X2, X2
+	VPXOR   X15, X2, X2
+	VPADDQ  X3, X3, X15
+	VPSRLQ  $0x3f, X3, X3
+	VPXOR   X15, X3, X3
+	VMOVDQA X2, X13
+	VMOVDQA X4, X14
+	BYTE    $0xc5
+	BYTE    $0x69
+	BYTE    $0x6c
+	BYTE    $0xfa
+	VMOVDQA X5, X4
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x61
+	BYTE    $0x6d
+	BYTE    $0xd7
+	VMOVDQA X14, X5
+	BYTE    $0xc5
+	BYTE    $0x61
+	BYTE    $0x6c
+	BYTE    $0xfb
+	VMOVDQA X6, X14
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x11
+	BYTE    $0x6d
+	BYTE    $0xdf
+	BYTE    $0xc5
+	BYTE    $0x41
+	BYTE    $0x6c
+	BYTE    $0xff
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x49
+	BYTE    $0x6d
+	BYTE    $0xf7
+	BYTE    $0xc4
+	BYTE    $0x41
+	BYTE    $0x09
+	BYTE    $0x6c
+	BYTE    $0xfe
+	BYTE    $0xc4
+	BYTE    $0xc1
+	BYTE    $0x41
+	BYTE    $0x6d
+	BYTE    $0xff
 	VMOVDQU 32(AX), X14
 	VMOVDQU 48(AX), X15
 	VPXOR   X0, X10, X10
@@ -729,16 +4524,36 @@ noinc:
 	VPXOR   X7, X15, X3
 	VMOVDQU X2, 32(AX)
 	VMOVDQU X3, 48(AX)
+	LEAQ    128(SI), SI
+	SUBQ    $0x80, DI
+	JNE     loop
+	VMOVDQU X10, (AX)
+	VMOVDQU X11, 16(AX)
+	MOVQ    R8, (BX)
+	MOVQ    R9, 8(BX)
+	VZEROUPPER
+	RET
 
-	LEAQ 128(SI), SI
-	SUBQ $128, DI
-	JNE  loop
+DATA ·AVX_c40<>+0(SB)/8, $0x0201000706050403
+DATA ·AVX_c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·AVX_c40<>(SB), RODATA|NOPTR, $16
 
-	VMOVDQU X10, 0(AX)
-	VMOVDQU X11, 16(AX)
+DATA ·AVX_c48<>+0(SB)/8, $0x0100070605040302
+DATA ·AVX_c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·AVX_c48<>(SB), RODATA|NOPTR, $16
 
-	MOVQ R8, 0(BX)
-	MOVQ R9, 8(BX)
-	VZEROUPPER
+DATA ·AVX_iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·AVX_iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·AVX_iv3<>(SB), RODATA|NOPTR, $16
 
-	RET
+DATA ·AVX_iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·AVX_iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·AVX_iv0<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·AVX_iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·AVX_iv1<>(SB), RODATA|NOPTR, $16
+
+DATA ·AVX_iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·AVX_iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·AVX_iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
index adfac00c..9a0ce212 100644
--- a/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
+++ b/vendor/golang.org/x/crypto/blake2b/blake2b_amd64.s
@@ -1,278 +1,1441 @@
-// Copyright 2016 The Go Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style
-// license that can be found in the LICENSE file.
+// Code generated by command: go run blake2b_amd64_asm.go -out ../../blake2b_amd64.s -pkg blake2b. DO NOT EDIT.
 
 //go:build amd64 && gc && !purego
 
 #include "textflag.h"
 
-DATA ·iv0<>+0x00(SB)/8, $0x6a09e667f3bcc908
-DATA ·iv0<>+0x08(SB)/8, $0xbb67ae8584caa73b
-GLOBL ·iv0<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv1<>+0x00(SB)/8, $0x3c6ef372fe94f82b
-DATA ·iv1<>+0x08(SB)/8, $0xa54ff53a5f1d36f1
-GLOBL ·iv1<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv2<>+0x00(SB)/8, $0x510e527fade682d1
-DATA ·iv2<>+0x08(SB)/8, $0x9b05688c2b3e6c1f
-GLOBL ·iv2<>(SB), (NOPTR+RODATA), $16
-
-DATA ·iv3<>+0x00(SB)/8, $0x1f83d9abfb41bd6b
-DATA ·iv3<>+0x08(SB)/8, $0x5be0cd19137e2179
-GLOBL ·iv3<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c40<>+0x00(SB)/8, $0x0201000706050403
-DATA ·c40<>+0x08(SB)/8, $0x0a09080f0e0d0c0b
-GLOBL ·c40<>(SB), (NOPTR+RODATA), $16
-
-DATA ·c48<>+0x00(SB)/8, $0x0100070605040302
-DATA ·c48<>+0x08(SB)/8, $0x09080f0e0d0c0b0a
-GLOBL ·c48<>(SB), (NOPTR+RODATA), $16
-
-#define SHUFFLE(v2, v3, v4, v5, v6, v7, t1, t2) \
-	MOVO       v4, t1; \
-	MOVO       v5, v4; \
-	MOVO       t1, v5; \
-	MOVO       v6, t1; \
-	PUNPCKLQDQ v6, t2; \
-	PUNPCKHQDQ v7, v6; \
-	PUNPCKHQDQ t2, v6; \
-	PUNPCKLQDQ v7, t2; \
-	MOVO       t1, v7; \
-	MOVO       v2, t1; \
-	PUNPCKHQDQ t2, v7; \
-	PUNPCKLQDQ v3, t2; \
-	PUNPCKHQDQ t2, v2; \
-	PUNPCKLQDQ t1, t2; \
-	PUNPCKHQDQ t2, v3
-
-#define SHUFFLE_INV(v2, v3, v4, v5, v6, v7, t1, t2) \
-	MOVO       v4, t1; \
-	MOVO       v5, v4; \
-	MOVO       t1, v5; \
-	MOVO       v2, t1; \
-	PUNPCKLQDQ v2, t2; \
-	PUNPCKHQDQ v3, v2; \
-	PUNPCKHQDQ t2, v2; \
-	PUNPCKLQDQ v3, t2; \
-	MOVO       t1, v3; \
-	MOVO       v6, t1; \
-	PUNPCKHQDQ t2, v3; \
-	PUNPCKLQDQ v7, t2; \
-	PUNPCKHQDQ t2, v6; \
-	PUNPCKLQDQ t1, t2; \
-	PUNPCKHQDQ t2, v7
-
-#define HALF_ROUND(v0, v1, v2, v3, v4, v5, v6, v7, m0, m1, m2, m3, t0, c40, c48) \
-	PADDQ  m0, v0;        \
-	PADDQ  m1, v1;        \
-	PADDQ  v2, v0;        \
-	PADDQ  v3, v1;        \
-	PXOR   v0, v6;        \
-	PXOR   v1, v7;        \
-	PSHUFD $0xB1, v6, v6; \
-	PSHUFD $0xB1, v7, v7; \
-	PADDQ  v6, v4;        \
-	PADDQ  v7, v5;        \
-	PXOR   v4, v2;        \
-	PXOR   v5, v3;        \
-	PSHUFB c40, v2;       \
-	PSHUFB c40, v3;       \
-	PADDQ  m2, v0;        \
-	PADDQ  m3, v1;        \
-	PADDQ  v2, v0;        \
-	PADDQ  v3, v1;        \
-	PXOR   v0, v6;        \
-	PXOR   v1, v7;        \
-	PSHUFB c48, v6;       \
-	PSHUFB c48, v7;       \
-	PADDQ  v6, v4;        \
-	PADDQ  v7, v5;        \
-	PXOR   v4, v2;        \
-	PXOR   v5, v3;        \
-	MOVOU  v2, t0;        \
-	PADDQ  v2, t0;        \
-	PSRLQ  $63, v2;       \
-	PXOR   t0, v2;        \
-	MOVOU  v3, t0;        \
-	PADDQ  v3, t0;        \
-	PSRLQ  $63, v3;       \
-	PXOR   t0, v3
-
-#define LOAD_MSG(m0, m1, m2, m3, src, i0, i1, i2, i3, i4, i5, i6, i7) \
-	MOVQ   i0*8(src), m0;     \
-	PINSRQ $1, i1*8(src), m0; \
-	MOVQ   i2*8(src), m1;     \
-	PINSRQ $1, i3*8(src), m1; \
-	MOVQ   i4*8(src), m2;     \
-	PINSRQ $1, i5*8(src), m2; \
-	MOVQ   i6*8(src), m3;     \
-	PINSRQ $1, i7*8(src), m3
-
 // func hashBlocksSSE4(h *[8]uint64, c *[2]uint64, flag uint64, blocks []byte)
-TEXT ·hashBlocksSSE4(SB), 4, $288-48 // frame size = 272 + 16 byte alignment
-	MOVQ h+0(FP), AX
-	MOVQ c+8(FP), BX
-	MOVQ flag+16(FP), CX
-	MOVQ blocks_base+24(FP), SI
-	MOVQ blocks_len+32(FP), DI
-
-	MOVQ SP, R10
-	ADDQ $15, R10
-	ANDQ $~15, R10
-
-	MOVOU ·iv3<>(SB), X0
-	MOVO  X0, 0(R10)
-	XORQ  CX, 0(R10)     // 0(R10) = ·iv3 ^ (CX || 0)
-
-	MOVOU ·c40<>(SB), X13
-	MOVOU ·c48<>(SB), X14
-
-	MOVOU 0(AX), X12
+// Requires: SSE2, SSE4.1, SSSE3
+TEXT ·hashBlocksSSE4(SB), NOSPLIT, $288-48
+	MOVQ  h+0(FP), AX
+	MOVQ  c+8(FP), BX
+	MOVQ  flag+16(FP), CX
+	MOVQ  blocks_base+24(FP), SI
+	MOVQ  blocks_len+32(FP), DI
+	MOVQ  SP, R10
+	ADDQ  $0x0f, R10
+	ANDQ  $-16, R10
+	MOVOU ·iv3<>+0(SB), X0
+	MOVO  X0, (R10)
+	XORQ  CX, (R10)
+	MOVOU ·c40<>+0(SB), X13
+	MOVOU ·c48<>+0(SB), X14
+	MOVOU (AX), X12
 	MOVOU 16(AX), X15
-
-	MOVQ 0(BX), R8
-	MOVQ 8(BX), R9
+	MOVQ  (BX), R8
+	MOVQ  8(BX), R9
 
 loop:
-	ADDQ $128, R8
-	CMPQ R8, $128
+	ADDQ $0x80, R8
+	CMPQ R8, $0x80
 	JGE  noinc
 	INCQ R9
 
 noinc:
-	MOVQ R8, X8
-	PINSRQ $1, R9, X8
-
-	MOVO X12, X0
-	MOVO X15, X1
-	MOVOU 32(AX), X2
-	MOVOU 48(AX), X3
-	MOVOU ·iv0<>(SB), X4
-	MOVOU ·iv1<>(SB), X5
-	MOVOU ·iv2<>(SB), X6
-
-	PXOR X8, X6
-	MOVO 0(R10), X7
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 0, 2, 4, 6, 1, 3, 5, 7)
-	MOVO X8, 16(R10)
-	MOVO X9, 32(R10)
-	MOVO X10, 48(R10)
-	MOVO X11, 64(R10)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 8, 10, 12, 14, 9, 11, 13, 15)
-	MOVO X8, 80(R10)
-	MOVO X9, 96(R10)
-	MOVO X10, 112(R10)
-	MOVO X11, 128(R10)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 14, 4, 9, 13, 10, 8, 15, 6)
-	MOVO X8, 144(R10)
-	MOVO X9, 160(R10)
-	MOVO X10, 176(R10)
-	MOVO X11, 192(R10)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 1, 0, 11, 5, 12, 2, 7, 3)
-	MOVO X8, 208(R10)
-	MOVO X9, 224(R10)
-	MOVO X10, 240(R10)
-	MOVO X11, 256(R10)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 11, 12, 5, 15, 8, 0, 2, 13)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 10, 3, 7, 9, 14, 6, 1, 4)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 7, 3, 13, 11, 9, 1, 12, 14)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 2, 5, 4, 15, 6, 10, 0, 8)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 9, 5, 2, 10, 0, 7, 4, 15)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 14, 11, 6, 3, 1, 12, 8, 13)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 2, 6, 0, 8, 12, 10, 11, 3)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 4, 7, 15, 1, 13, 5, 14, 9)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 12, 1, 14, 4, 5, 15, 13, 10)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 0, 6, 9, 8, 7, 3, 2, 11)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 13, 7, 12, 3, 11, 14, 1, 9)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 5, 15, 8, 2, 0, 4, 6, 10)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 6, 14, 11, 0, 15, 9, 3, 8)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 12, 13, 1, 10, 2, 7, 4, 5)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	LOAD_MSG(X8, X9, X10, X11, SI, 10, 8, 7, 1, 2, 4, 6, 5)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	LOAD_MSG(X8, X9, X10, X11, SI, 15, 9, 3, 13, 11, 14, 12, 0)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, X8, X9, X10, X11, X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
-
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 16(R10), 32(R10), 48(R10), 64(R10), X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 80(R10), 96(R10), 112(R10), 128(R10), X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
+	MOVQ       R8, X8
+	PINSRQ     $0x01, R9, X8
+	MOVO       X12, X0
+	MOVO       X15, X1
+	MOVOU      32(AX), X2
+	MOVOU      48(AX), X3
+	MOVOU      ·iv0<>+0(SB), X4
+	MOVOU      ·iv1<>+0(SB), X5
+	MOVOU      ·iv2<>+0(SB), X6
+	PXOR       X8, X6
+	MOVO       (R10), X7
+	MOVQ       (SI), X8
+	PINSRQ     $0x01, 16(SI), X8
+	MOVQ       32(SI), X9
+	PINSRQ     $0x01, 48(SI), X9
+	MOVQ       8(SI), X10
+	PINSRQ     $0x01, 24(SI), X10
+	MOVQ       40(SI), X11
+	PINSRQ     $0x01, 56(SI), X11
+	MOVO       X8, 16(R10)
+	MOVO       X9, 32(R10)
+	MOVO       X10, 48(R10)
+	MOVO       X11, 64(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       64(SI), X8
+	PINSRQ     $0x01, 80(SI), X8
+	MOVQ       96(SI), X9
+	PINSRQ     $0x01, 112(SI), X9
+	MOVQ       72(SI), X10
+	PINSRQ     $0x01, 88(SI), X10
+	MOVQ       104(SI), X11
+	PINSRQ     $0x01, 120(SI), X11
+	MOVO       X8, 80(R10)
+	MOVO       X9, 96(R10)
+	MOVO       X10, 112(R10)
+	MOVO       X11, 128(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       112(SI), X8
+	PINSRQ     $0x01, 32(SI), X8
+	MOVQ       72(SI), X9
+	PINSRQ     $0x01, 104(SI), X9
+	MOVQ       80(SI), X10
+	PINSRQ     $0x01, 64(SI), X10
+	MOVQ       120(SI), X11
+	PINSRQ     $0x01, 48(SI), X11
+	MOVO       X8, 144(R10)
+	MOVO       X9, 160(R10)
+	MOVO       X10, 176(R10)
+	MOVO       X11, 192(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       8(SI), X8
+	PINSRQ     $0x01, (SI), X8
+	MOVQ       88(SI), X9
+	PINSRQ     $0x01, 40(SI), X9
+	MOVQ       96(SI), X10
+	PINSRQ     $0x01, 16(SI), X10
+	MOVQ       56(SI), X11
+	PINSRQ     $0x01, 24(SI), X11
+	MOVO       X8, 208(R10)
+	MOVO       X9, 224(R10)
+	MOVO       X10, 240(R10)
+	MOVO       X11, 256(R10)
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       88(SI), X8
+	PINSRQ     $0x01, 96(SI), X8
+	MOVQ       40(SI), X9
+	PINSRQ     $0x01, 120(SI), X9
+	MOVQ       64(SI), X10
+	PINSRQ     $0x01, (SI), X10
+	MOVQ       16(SI), X11
+	PINSRQ     $0x01, 104(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       80(SI), X8
+	PINSRQ     $0x01, 24(SI), X8
+	MOVQ       56(SI), X9
+	PINSRQ     $0x01, 72(SI), X9
+	MOVQ       112(SI), X10
+	PINSRQ     $0x01, 48(SI), X10
+	MOVQ       8(SI), X11
+	PINSRQ     $0x01, 32(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       56(SI), X8
+	PINSRQ     $0x01, 24(SI), X8
+	MOVQ       104(SI), X9
+	PINSRQ     $0x01, 88(SI), X9
+	MOVQ       72(SI), X10
+	PINSRQ     $0x01, 8(SI), X10
+	MOVQ       96(SI), X11
+	PINSRQ     $0x01, 112(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       16(SI), X8
+	PINSRQ     $0x01, 40(SI), X8
+	MOVQ       32(SI), X9
+	PINSRQ     $0x01, 120(SI), X9
+	MOVQ       48(SI), X10
+	PINSRQ     $0x01, 80(SI), X10
+	MOVQ       (SI), X11
+	PINSRQ     $0x01, 64(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       72(SI), X8
+	PINSRQ     $0x01, 40(SI), X8
+	MOVQ       16(SI), X9
+	PINSRQ     $0x01, 80(SI), X9
+	MOVQ       (SI), X10
+	PINSRQ     $0x01, 56(SI), X10
+	MOVQ       32(SI), X11
+	PINSRQ     $0x01, 120(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       112(SI), X8
+	PINSRQ     $0x01, 88(SI), X8
+	MOVQ       48(SI), X9
+	PINSRQ     $0x01, 24(SI), X9
+	MOVQ       8(SI), X10
+	PINSRQ     $0x01, 96(SI), X10
+	MOVQ       64(SI), X11
+	PINSRQ     $0x01, 104(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       16(SI), X8
+	PINSRQ     $0x01, 48(SI), X8
+	MOVQ       (SI), X9
+	PINSRQ     $0x01, 64(SI), X9
+	MOVQ       96(SI), X10
+	PINSRQ     $0x01, 80(SI), X10
+	MOVQ       88(SI), X11
+	PINSRQ     $0x01, 24(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       32(SI), X8
+	PINSRQ     $0x01, 56(SI), X8
+	MOVQ       120(SI), X9
+	PINSRQ     $0x01, 8(SI), X9
+	MOVQ       104(SI), X10
+	PINSRQ     $0x01, 40(SI), X10
+	MOVQ       112(SI), X11
+	PINSRQ     $0x01, 72(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       96(SI), X8
+	PINSRQ     $0x01, 8(SI), X8
+	MOVQ       112(SI), X9
+	PINSRQ     $0x01, 32(SI), X9
+	MOVQ       40(SI), X10
+	PINSRQ     $0x01, 120(SI), X10
+	MOVQ       104(SI), X11
+	PINSRQ     $0x01, 80(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       (SI), X8
+	PINSRQ     $0x01, 48(SI), X8
+	MOVQ       72(SI), X9
+	PINSRQ     $0x01, 64(SI), X9
+	MOVQ       56(SI), X10
+	PINSRQ     $0x01, 24(SI), X10
+	MOVQ       16(SI), X11
+	PINSRQ     $0x01, 88(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       104(SI), X8
+	PINSRQ     $0x01, 56(SI), X8
+	MOVQ       96(SI), X9
+	PINSRQ     $0x01, 24(SI), X9
+	MOVQ       88(SI), X10
+	PINSRQ     $0x01, 112(SI), X10
+	MOVQ       8(SI), X11
+	PINSRQ     $0x01, 72(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       40(SI), X8
+	PINSRQ     $0x01, 120(SI), X8
+	MOVQ       64(SI), X9
+	PINSRQ     $0x01, 16(SI), X9
+	MOVQ       (SI), X10
+	PINSRQ     $0x01, 32(SI), X10
+	MOVQ       48(SI), X11
+	PINSRQ     $0x01, 80(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       48(SI), X8
+	PINSRQ     $0x01, 112(SI), X8
+	MOVQ       88(SI), X9
+	PINSRQ     $0x01, (SI), X9
+	MOVQ       120(SI), X10
+	PINSRQ     $0x01, 72(SI), X10
+	MOVQ       24(SI), X11
+	PINSRQ     $0x01, 64(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       96(SI), X8
+	PINSRQ     $0x01, 104(SI), X8
+	MOVQ       8(SI), X9
+	PINSRQ     $0x01, 80(SI), X9
+	MOVQ       16(SI), X10
+	PINSRQ     $0x01, 56(SI), X10
+	MOVQ       32(SI), X11
+	PINSRQ     $0x01, 40(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVQ       80(SI), X8
+	PINSRQ     $0x01, 64(SI), X8
+	MOVQ       56(SI), X9
+	PINSRQ     $0x01, 8(SI), X9
+	MOVQ       16(SI), X10
+	PINSRQ     $0x01, 32(SI), X10
+	MOVQ       48(SI), X11
+	PINSRQ     $0x01, 40(SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	MOVQ       120(SI), X8
+	PINSRQ     $0x01, 72(SI), X8
+	MOVQ       24(SI), X9
+	PINSRQ     $0x01, 104(SI), X9
+	MOVQ       88(SI), X10
+	PINSRQ     $0x01, 112(SI), X10
+	MOVQ       96(SI), X11
+	PINSRQ     $0x01, (SI), X11
+	PADDQ      X8, X0
+	PADDQ      X9, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      X10, X0
+	PADDQ      X11, X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	PADDQ      16(R10), X0
+	PADDQ      32(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      48(R10), X0
+	PADDQ      64(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	PADDQ      80(R10), X0
+	PADDQ      96(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      112(R10), X0
+	PADDQ      128(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	PADDQ      144(R10), X0
+	PADDQ      160(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      176(R10), X0
+	PADDQ      192(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X6, X8
+	PUNPCKLQDQ X6, X9
+	PUNPCKHQDQ X7, X6
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X7, X9
+	MOVO       X8, X7
+	MOVO       X2, X8
+	PUNPCKHQDQ X9, X7
+	PUNPCKLQDQ X3, X9
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X3
+	PADDQ      208(R10), X0
+	PADDQ      224(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFD     $0xb1, X6, X6
+	PSHUFD     $0xb1, X7, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	PSHUFB     X13, X2
+	PSHUFB     X13, X3
+	PADDQ      240(R10), X0
+	PADDQ      256(R10), X1
+	PADDQ      X2, X0
+	PADDQ      X3, X1
+	PXOR       X0, X6
+	PXOR       X1, X7
+	PSHUFB     X14, X6
+	PSHUFB     X14, X7
+	PADDQ      X6, X4
+	PADDQ      X7, X5
+	PXOR       X4, X2
+	PXOR       X5, X3
+	MOVOU      X2, X11
+	PADDQ      X2, X11
+	PSRLQ      $0x3f, X2
+	PXOR       X11, X2
+	MOVOU      X3, X11
+	PADDQ      X3, X11
+	PSRLQ      $0x3f, X3
+	PXOR       X11, X3
+	MOVO       X4, X8
+	MOVO       X5, X4
+	MOVO       X8, X5
+	MOVO       X2, X8
+	PUNPCKLQDQ X2, X9
+	PUNPCKHQDQ X3, X2
+	PUNPCKHQDQ X9, X2
+	PUNPCKLQDQ X3, X9
+	MOVO       X8, X3
+	MOVO       X6, X8
+	PUNPCKHQDQ X9, X3
+	PUNPCKLQDQ X7, X9
+	PUNPCKHQDQ X9, X6
+	PUNPCKLQDQ X8, X9
+	PUNPCKHQDQ X9, X7
+	MOVOU      32(AX), X10
+	MOVOU      48(AX), X11
+	PXOR       X0, X12
+	PXOR       X1, X15
+	PXOR       X2, X10
+	PXOR       X3, X11
+	PXOR       X4, X12
+	PXOR       X5, X15
+	PXOR       X6, X10
+	PXOR       X7, X11
+	MOVOU      X10, 32(AX)
+	MOVOU      X11, 48(AX)
+	LEAQ       128(SI), SI
+	SUBQ       $0x80, DI
+	JNE        loop
+	MOVOU      X12, (AX)
+	MOVOU      X15, 16(AX)
+	MOVQ       R8, (BX)
+	MOVQ       R9, 8(BX)
+	RET
 
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 144(R10), 160(R10), 176(R10), 192(R10), X11, X13, X14)
-	SHUFFLE(X2, X3, X4, X5, X6, X7, X8, X9)
-	HALF_ROUND(X0, X1, X2, X3, X4, X5, X6, X7, 208(R10), 224(R10), 240(R10), 256(R10), X11, X13, X14)
-	SHUFFLE_INV(X2, X3, X4, X5, X6, X7, X8, X9)
+DATA ·iv3<>+0(SB)/8, $0x1f83d9abfb41bd6b
+DATA ·iv3<>+8(SB)/8, $0x5be0cd19137e2179
+GLOBL ·iv3<>(SB), RODATA|NOPTR, $16
 
-	MOVOU 32(AX), X10
-	MOVOU 48(AX), X11
-	PXOR  X0, X12
-	PXOR  X1, X15
-	PXOR  X2, X10
-	PXOR  X3, X11
-	PXOR  X4, X12
-	PXOR  X5, X15
-	PXOR  X6, X10
-	PXOR  X7, X11
-	MOVOU X10, 32(AX)
-	MOVOU X11, 48(AX)
+DATA ·c40<>+0(SB)/8, $0x0201000706050403
+DATA ·c40<>+8(SB)/8, $0x0a09080f0e0d0c0b
+GLOBL ·c40<>(SB), RODATA|NOPTR, $16
 
-	LEAQ 128(SI), SI
-	SUBQ $128, DI
-	JNE  loop
+DATA ·c48<>+0(SB)/8, $0x0100070605040302
+DATA ·c48<>+8(SB)/8, $0x09080f0e0d0c0b0a
+GLOBL ·c48<>(SB), RODATA|NOPTR, $16
 
-	MOVOU X12, 0(AX)
-	MOVOU X15, 16(AX)
+DATA ·iv0<>+0(SB)/8, $0x6a09e667f3bcc908
+DATA ·iv0<>+8(SB)/8, $0xbb67ae8584caa73b
+GLOBL ·iv0<>(SB), RODATA|NOPTR, $16
 
-	MOVQ R8, 0(BX)
-	MOVQ R9, 8(BX)
+DATA ·iv1<>+0(SB)/8, $0x3c6ef372fe94f82b
+DATA ·iv1<>+8(SB)/8, $0xa54ff53a5f1d36f1
+GLOBL ·iv1<>(SB), RODATA|NOPTR, $16
 
-	RET
+DATA ·iv2<>+0(SB)/8, $0x510e527fade682d1
+DATA ·iv2<>+8(SB)/8, $0x9b05688c2b3e6c1f
+GLOBL ·iv2<>(SB), RODATA|NOPTR, $16
diff --git a/vendor/golang.org/x/net/http2/config.go b/vendor/golang.org/x/net/http2/config.go
new file mode 100644
index 00000000..de58dfb8
--- /dev/null
+++ b/vendor/golang.org/x/net/http2/config.go
@@ -0,0 +1,122 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package http2
+
+import (
+	"math"
+	"net/http"
+	"time"
+)
+
+// http2Config is a package-internal version of net/http.HTTP2Config.
+//
+// http.HTTP2Config was added in Go 1.24.
+// When running with a version of net/http that includes HTTP2Config,
+// we merge the configuration with the fields in Transport or Server
+// to produce an http2Config.
+//
+// Zero valued fields in http2Config are interpreted as in the
+// net/http.HTTPConfig documentation.
+//
+// Precedence order for reconciling configurations is:
+//
+//   - Use the net/http.{Server,Transport}.HTTP2Config value, when non-zero.
+//   - Otherwise use the http2.{Server.Transport} value.
+//   - If the resulting value is zero or out of range, use a default.
+type http2Config struct {
+	MaxConcurrentStreams         uint32
+	MaxDecoderHeaderTableSize    uint32
+	MaxEncoderHeaderTableSize    uint32
+	MaxReadFrameSize             uint32
+	MaxUploadBufferPerConnection int32
+	MaxUploadBufferPerStream     int32
+	SendPingTimeout              time.Duration
+	PingTimeout                  time.Duration
+	WriteByteTimeout             time.Duration
+	PermitProhibitedCipherSuites bool
+	CountError                   func(errType string)
+}
+
+// configFromServer merges configuration settings from
+// net/http.Server.HTTP2Config and http2.Server.
+func configFromServer(h1 *http.Server, h2 *Server) http2Config {
+	conf := http2Config{
+		MaxConcurrentStreams:         h2.MaxConcurrentStreams,
+		MaxEncoderHeaderTableSize:    h2.MaxEncoderHeaderTableSize,
+		MaxDecoderHeaderTableSize:    h2.MaxDecoderHeaderTableSize,
+		MaxReadFrameSize:             h2.MaxReadFrameSize,
+		MaxUploadBufferPerConnection: h2.MaxUploadBufferPerConnection,
+		MaxUploadBufferPerStream:     h2.MaxUploadBufferPerStream,
+		SendPingTimeout:              h2.ReadIdleTimeout,
+		PingTimeout:                  h2.PingTimeout,
+		WriteByteTimeout:             h2.WriteByteTimeout,
+		PermitProhibitedCipherSuites: h2.PermitProhibitedCipherSuites,
+		CountError:                   h2.CountError,
+	}
+	fillNetHTTPServerConfig(&conf, h1)
+	setConfigDefaults(&conf, true)
+	return conf
+}
+
+// configFromServer merges configuration settings from h2 and h2.t1.HTTP2
+// (the net/http Transport).
+func configFromTransport(h2 *Transport) http2Config {
+	conf := http2Config{
+		MaxEncoderHeaderTableSize: h2.MaxEncoderHeaderTableSize,
+		MaxDecoderHeaderTableSize: h2.MaxDecoderHeaderTableSize,
+		MaxReadFrameSize:          h2.MaxReadFrameSize,
+		SendPingTimeout:           h2.ReadIdleTimeout,
+		PingTimeout:               h2.PingTimeout,
+		WriteByteTimeout:          h2.WriteByteTimeout,
+	}
+
+	// Unlike most config fields, where out-of-range values revert to the default,
+	// Transport.MaxReadFrameSize clips.
+	if conf.MaxReadFrameSize < minMaxFrameSize {
+		conf.MaxReadFrameSize = minMaxFrameSize
+	} else if conf.MaxReadFrameSize > maxFrameSize {
+		conf.MaxReadFrameSize = maxFrameSize
+	}
+
+	if h2.t1 != nil {
+		fillNetHTTPTransportConfig(&conf, h2.t1)
+	}
+	setConfigDefaults(&conf, false)
+	return conf
+}
+
+func setDefault[T ~int | ~int32 | ~uint32 | ~int64](v *T, minval, maxval, defval T) {
+	if *v < minval || *v > maxval {
+		*v = defval
+	}
+}
+
+func setConfigDefaults(conf *http2Config, server bool) {
+	setDefault(&conf.MaxConcurrentStreams, 1, math.MaxUint32, defaultMaxStreams)
+	setDefault(&conf.MaxEncoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize)
+	setDefault(&conf.MaxDecoderHeaderTableSize, 1, math.MaxUint32, initialHeaderTableSize)
+	if server {
+		setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, 1<<20)
+	} else {
+		setDefault(&conf.MaxUploadBufferPerConnection, initialWindowSize, math.MaxInt32, transportDefaultConnFlow)
+	}
+	if server {
+		setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, 1<<20)
+	} else {
+		setDefault(&conf.MaxUploadBufferPerStream, 1, math.MaxInt32, transportDefaultStreamFlow)
+	}
+	setDefault(&conf.MaxReadFrameSize, minMaxFrameSize, maxFrameSize, defaultMaxReadFrameSize)
+	setDefault(&conf.PingTimeout, 1, math.MaxInt64, 15*time.Second)
+}
+
+// adjustHTTP1MaxHeaderSize converts a limit in bytes on the size of an HTTP/1 header
+// to an HTTP/2 MAX_HEADER_LIST_SIZE value.
+func adjustHTTP1MaxHeaderSize(n int64) int64 {
+	// http2's count is in a slightly different unit and includes 32 bytes per pair.
+	// So, take the net/http.Server value and pad it up a bit, assuming 10 headers.
+	const perFieldOverhead = 32 // per http2 spec
+	const typicalHeaders = 10   // conservative
+	return n + typicalHeaders*perFieldOverhead
+}
diff --git a/vendor/golang.org/x/net/http2/config_go124.go b/vendor/golang.org/x/net/http2/config_go124.go
new file mode 100644
index 00000000..e3784123
--- /dev/null
+++ b/vendor/golang.org/x/net/http2/config_go124.go
@@ -0,0 +1,61 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build go1.24
+
+package http2
+
+import "net/http"
+
+// fillNetHTTPServerConfig sets fields in conf from srv.HTTP2.
+func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) {
+	fillNetHTTPConfig(conf, srv.HTTP2)
+}
+
+// fillNetHTTPServerConfig sets fields in conf from tr.HTTP2.
+func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) {
+	fillNetHTTPConfig(conf, tr.HTTP2)
+}
+
+func fillNetHTTPConfig(conf *http2Config, h2 *http.HTTP2Config) {
+	if h2 == nil {
+		return
+	}
+	if h2.MaxConcurrentStreams != 0 {
+		conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams)
+	}
+	if h2.MaxEncoderHeaderTableSize != 0 {
+		conf.MaxEncoderHeaderTableSize = uint32(h2.MaxEncoderHeaderTableSize)
+	}
+	if h2.MaxDecoderHeaderTableSize != 0 {
+		conf.MaxDecoderHeaderTableSize = uint32(h2.MaxDecoderHeaderTableSize)
+	}
+	if h2.MaxConcurrentStreams != 0 {
+		conf.MaxConcurrentStreams = uint32(h2.MaxConcurrentStreams)
+	}
+	if h2.MaxReadFrameSize != 0 {
+		conf.MaxReadFrameSize = uint32(h2.MaxReadFrameSize)
+	}
+	if h2.MaxReceiveBufferPerConnection != 0 {
+		conf.MaxUploadBufferPerConnection = int32(h2.MaxReceiveBufferPerConnection)
+	}
+	if h2.MaxReceiveBufferPerStream != 0 {
+		conf.MaxUploadBufferPerStream = int32(h2.MaxReceiveBufferPerStream)
+	}
+	if h2.SendPingTimeout != 0 {
+		conf.SendPingTimeout = h2.SendPingTimeout
+	}
+	if h2.PingTimeout != 0 {
+		conf.PingTimeout = h2.PingTimeout
+	}
+	if h2.WriteByteTimeout != 0 {
+		conf.WriteByteTimeout = h2.WriteByteTimeout
+	}
+	if h2.PermitProhibitedCipherSuites {
+		conf.PermitProhibitedCipherSuites = true
+	}
+	if h2.CountError != nil {
+		conf.CountError = h2.CountError
+	}
+}
diff --git a/vendor/golang.org/x/net/http2/config_pre_go124.go b/vendor/golang.org/x/net/http2/config_pre_go124.go
new file mode 100644
index 00000000..060fd6c6
--- /dev/null
+++ b/vendor/golang.org/x/net/http2/config_pre_go124.go
@@ -0,0 +1,16 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build !go1.24
+
+package http2
+
+import "net/http"
+
+// Pre-Go 1.24 fallback.
+// The Server.HTTP2 and Transport.HTTP2 config fields were added in Go 1.24.
+
+func fillNetHTTPServerConfig(conf *http2Config, srv *http.Server) {}
+
+func fillNetHTTPTransportConfig(conf *http2Config, tr *http.Transport) {}
diff --git a/vendor/golang.org/x/net/http2/http2.go b/vendor/golang.org/x/net/http2/http2.go
index 003e649f..7688c356 100644
--- a/vendor/golang.org/x/net/http2/http2.go
+++ b/vendor/golang.org/x/net/http2/http2.go
@@ -19,8 +19,9 @@ import (
 	"bufio"
 	"context"
 	"crypto/tls"
+	"errors"
 	"fmt"
-	"io"
+	"net"
 	"net/http"
 	"os"
 	"sort"
@@ -237,13 +238,19 @@ func (cw closeWaiter) Wait() {
 // Its buffered writer is lazily allocated as needed, to minimize
 // idle memory usage with many connections.
 type bufferedWriter struct {
-	_  incomparable
-	w  io.Writer     // immutable
-	bw *bufio.Writer // non-nil when data is buffered
+	_           incomparable
+	group       synctestGroupInterface // immutable
+	conn        net.Conn               // immutable
+	bw          *bufio.Writer          // non-nil when data is buffered
+	byteTimeout time.Duration          // immutable, WriteByteTimeout
 }
 
-func newBufferedWriter(w io.Writer) *bufferedWriter {
-	return &bufferedWriter{w: w}
+func newBufferedWriter(group synctestGroupInterface, conn net.Conn, timeout time.Duration) *bufferedWriter {
+	return &bufferedWriter{
+		group:       group,
+		conn:        conn,
+		byteTimeout: timeout,
+	}
 }
 
 // bufWriterPoolBufferSize is the size of bufio.Writer's
@@ -270,7 +277,7 @@ func (w *bufferedWriter) Available() int {
 func (w *bufferedWriter) Write(p []byte) (n int, err error) {
 	if w.bw == nil {
 		bw := bufWriterPool.Get().(*bufio.Writer)
-		bw.Reset(w.w)
+		bw.Reset((*bufferedWriterTimeoutWriter)(w))
 		w.bw = bw
 	}
 	return w.bw.Write(p)
@@ -288,6 +295,38 @@ func (w *bufferedWriter) Flush() error {
 	return err
 }
 
+type bufferedWriterTimeoutWriter bufferedWriter
+
+func (w *bufferedWriterTimeoutWriter) Write(p []byte) (n int, err error) {
+	return writeWithByteTimeout(w.group, w.conn, w.byteTimeout, p)
+}
+
+// writeWithByteTimeout writes to conn.
+// If more than timeout passes without any bytes being written to the connection,
+// the write fails.
+func writeWithByteTimeout(group synctestGroupInterface, conn net.Conn, timeout time.Duration, p []byte) (n int, err error) {
+	if timeout <= 0 {
+		return conn.Write(p)
+	}
+	for {
+		var now time.Time
+		if group == nil {
+			now = time.Now()
+		} else {
+			now = group.Now()
+		}
+		conn.SetWriteDeadline(now.Add(timeout))
+		nn, err := conn.Write(p[n:])
+		n += nn
+		if n == len(p) || nn == 0 || !errors.Is(err, os.ErrDeadlineExceeded) {
+			// Either we finished the write, made no progress, or hit the deadline.
+			// Whichever it is, we're done now.
+			conn.SetWriteDeadline(time.Time{})
+			return n, err
+		}
+	}
+}
+
 func mustUint31(v int32) uint32 {
 	if v < 0 || v > 2147483647 {
 		panic("out of range")
diff --git a/vendor/golang.org/x/net/http2/server.go b/vendor/golang.org/x/net/http2/server.go
index 6c349f3e..617b4a47 100644
--- a/vendor/golang.org/x/net/http2/server.go
+++ b/vendor/golang.org/x/net/http2/server.go
@@ -29,6 +29,7 @@ import (
 	"bufio"
 	"bytes"
 	"context"
+	"crypto/rand"
 	"crypto/tls"
 	"errors"
 	"fmt"
@@ -52,10 +53,14 @@ import (
 )
 
 const (
-	prefaceTimeout         = 10 * time.Second
-	firstSettingsTimeout   = 2 * time.Second // should be in-flight with preface anyway
-	handlerChunkWriteSize  = 4 << 10
-	defaultMaxStreams      = 250 // TODO: make this 100 as the GFE seems to?
+	prefaceTimeout        = 10 * time.Second
+	firstSettingsTimeout  = 2 * time.Second // should be in-flight with preface anyway
+	handlerChunkWriteSize = 4 << 10
+	defaultMaxStreams     = 250 // TODO: make this 100 as the GFE seems to?
+
+	// maxQueuedControlFrames is the maximum number of control frames like
+	// SETTINGS, PING and RST_STREAM that will be queued for writing before
+	// the connection is closed to prevent memory exhaustion attacks.
 	maxQueuedControlFrames = 10000
 )
 
@@ -127,6 +132,22 @@ type Server struct {
 	// If zero or negative, there is no timeout.
 	IdleTimeout time.Duration
 
+	// ReadIdleTimeout is the timeout after which a health check using a ping
+	// frame will be carried out if no frame is received on the connection.
+	// If zero, no health check is performed.
+	ReadIdleTimeout time.Duration
+
+	// PingTimeout is the timeout after which the connection will be closed
+	// if a response to a ping is not received.
+	// If zero, a default of 15 seconds is used.
+	PingTimeout time.Duration
+
+	// WriteByteTimeout is the timeout after which a connection will be
+	// closed if no data can be written to it. The timeout begins when data is
+	// available to write, and is extended whenever any bytes are written.
+	// If zero or negative, there is no timeout.
+	WriteByteTimeout time.Duration
+
 	// MaxUploadBufferPerConnection is the size of the initial flow
 	// control window for each connections. The HTTP/2 spec does not
 	// allow this to be smaller than 65535 or larger than 2^32-1.
@@ -189,57 +210,6 @@ func (s *Server) afterFunc(d time.Duration, f func()) timer {
 	return timeTimer{time.AfterFunc(d, f)}
 }
 
-func (s *Server) initialConnRecvWindowSize() int32 {
-	if s.MaxUploadBufferPerConnection >= initialWindowSize {
-		return s.MaxUploadBufferPerConnection
-	}
-	return 1 << 20
-}
-
-func (s *Server) initialStreamRecvWindowSize() int32 {
-	if s.MaxUploadBufferPerStream > 0 {
-		return s.MaxUploadBufferPerStream
-	}
-	return 1 << 20
-}
-
-func (s *Server) maxReadFrameSize() uint32 {
-	if v := s.MaxReadFrameSize; v >= minMaxFrameSize && v <= maxFrameSize {
-		return v
-	}
-	return defaultMaxReadFrameSize
-}
-
-func (s *Server) maxConcurrentStreams() uint32 {
-	if v := s.MaxConcurrentStreams; v > 0 {
-		return v
-	}
-	return defaultMaxStreams
-}
-
-func (s *Server) maxDecoderHeaderTableSize() uint32 {
-	if v := s.MaxDecoderHeaderTableSize; v > 0 {
-		return v
-	}
-	return initialHeaderTableSize
-}
-
-func (s *Server) maxEncoderHeaderTableSize() uint32 {
-	if v := s.MaxEncoderHeaderTableSize; v > 0 {
-		return v
-	}
-	return initialHeaderTableSize
-}
-
-// maxQueuedControlFrames is the maximum number of control frames like
-// SETTINGS, PING and RST_STREAM that will be queued for writing before
-// the connection is closed to prevent memory exhaustion attacks.
-func (s *Server) maxQueuedControlFrames() int {
-	// TODO: if anybody asks, add a Server field, and remember to define the
-	// behavior of negative values.
-	return maxQueuedControlFrames
-}
-
 type serverInternalState struct {
 	mu          sync.Mutex
 	activeConns map[*serverConn]struct{}
@@ -440,13 +410,15 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
 	baseCtx, cancel := serverConnBaseContext(c, opts)
 	defer cancel()
 
+	http1srv := opts.baseConfig()
+	conf := configFromServer(http1srv, s)
 	sc := &serverConn{
 		srv:                         s,
-		hs:                          opts.baseConfig(),
+		hs:                          http1srv,
 		conn:                        c,
 		baseCtx:                     baseCtx,
 		remoteAddrStr:               c.RemoteAddr().String(),
-		bw:                          newBufferedWriter(c),
+		bw:                          newBufferedWriter(s.group, c, conf.WriteByteTimeout),
 		handler:                     opts.handler(),
 		streams:                     make(map[uint32]*stream),
 		readFrameCh:                 make(chan readFrameResult),
@@ -456,9 +428,12 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
 		bodyReadCh:                  make(chan bodyReadMsg),         // buffering doesn't matter either way
 		doneServing:                 make(chan struct{}),
 		clientMaxStreams:            math.MaxUint32, // Section 6.5.2: "Initially, there is no limit to this value"
-		advMaxStreams:               s.maxConcurrentStreams(),
+		advMaxStreams:               conf.MaxConcurrentStreams,
 		initialStreamSendWindowSize: initialWindowSize,
+		initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream,
 		maxFrameSize:                initialMaxFrameSize,
+		pingTimeout:                 conf.PingTimeout,
+		countErrorFunc:              conf.CountError,
 		serveG:                      newGoroutineLock(),
 		pushEnabled:                 true,
 		sawClientPreface:            opts.SawClientPreface,
@@ -491,15 +466,15 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
 	sc.flow.add(initialWindowSize)
 	sc.inflow.init(initialWindowSize)
 	sc.hpackEncoder = hpack.NewEncoder(&sc.headerWriteBuf)
-	sc.hpackEncoder.SetMaxDynamicTableSizeLimit(s.maxEncoderHeaderTableSize())
+	sc.hpackEncoder.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize)
 
 	fr := NewFramer(sc.bw, c)
-	if s.CountError != nil {
-		fr.countError = s.CountError
+	if conf.CountError != nil {
+		fr.countError = conf.CountError
 	}
-	fr.ReadMetaHeaders = hpack.NewDecoder(s.maxDecoderHeaderTableSize(), nil)
+	fr.ReadMetaHeaders = hpack.NewDecoder(conf.MaxDecoderHeaderTableSize, nil)
 	fr.MaxHeaderListSize = sc.maxHeaderListSize()
-	fr.SetMaxReadFrameSize(s.maxReadFrameSize())
+	fr.SetMaxReadFrameSize(conf.MaxReadFrameSize)
 	sc.framer = fr
 
 	if tc, ok := c.(connectionStater); ok {
@@ -532,7 +507,7 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
 			// So for now, do nothing here again.
 		}
 
-		if !s.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) {
+		if !conf.PermitProhibitedCipherSuites && isBadCipher(sc.tlsState.CipherSuite) {
 			// "Endpoints MAY choose to generate a connection error
 			// (Section 5.4.1) of type INADEQUATE_SECURITY if one of
 			// the prohibited cipher suites are negotiated."
@@ -569,7 +544,7 @@ func (s *Server) serveConn(c net.Conn, opts *ServeConnOpts, newf func(*serverCon
 		opts.UpgradeRequest = nil
 	}
 
-	sc.serve()
+	sc.serve(conf)
 }
 
 func serverConnBaseContext(c net.Conn, opts *ServeConnOpts) (ctx context.Context, cancel func()) {
@@ -609,6 +584,7 @@ type serverConn struct {
 	tlsState         *tls.ConnectionState   // shared by all handlers, like net/http
 	remoteAddrStr    string
 	writeSched       WriteScheduler
+	countErrorFunc   func(errType string)
 
 	// Everything following is owned by the serve loop; use serveG.check():
 	serveG                      goroutineLock // used to verify funcs are on serve()
@@ -628,6 +604,7 @@ type serverConn struct {
 	streams                     map[uint32]*stream
 	unstartedHandlers           []unstartedHandler
 	initialStreamSendWindowSize int32
+	initialStreamRecvWindowSize int32
 	maxFrameSize                int32
 	peerMaxHeaderListSize       uint32            // zero means unknown (default)
 	canonHeader                 map[string]string // http2-lower-case -> Go-Canonical-Case
@@ -638,9 +615,14 @@ type serverConn struct {
 	inGoAway                    bool              // we've started to or sent GOAWAY
 	inFrameScheduleLoop         bool              // whether we're in the scheduleFrameWrite loop
 	needToSendGoAway            bool              // we need to schedule a GOAWAY frame write
+	pingSent                    bool
+	sentPingData                [8]byte
 	goAwayCode                  ErrCode
 	shutdownTimer               timer // nil until used
 	idleTimer                   timer // nil if unused
+	readIdleTimeout             time.Duration
+	pingTimeout                 time.Duration
+	readIdleTimer               timer // nil if unused
 
 	// Owned by the writeFrameAsync goroutine:
 	headerWriteBuf bytes.Buffer
@@ -655,11 +637,7 @@ func (sc *serverConn) maxHeaderListSize() uint32 {
 	if n <= 0 {
 		n = http.DefaultMaxHeaderBytes
 	}
-	// http2's count is in a slightly different unit and includes 32 bytes per pair.
-	// So, take the net/http.Server value and pad it up a bit, assuming 10 headers.
-	const perFieldOverhead = 32 // per http2 spec
-	const typicalHeaders = 10   // conservative
-	return uint32(n + typicalHeaders*perFieldOverhead)
+	return uint32(adjustHTTP1MaxHeaderSize(int64(n)))
 }
 
 func (sc *serverConn) curOpenStreams() uint32 {
@@ -923,7 +901,7 @@ func (sc *serverConn) notePanic() {
 	}
 }
 
-func (sc *serverConn) serve() {
+func (sc *serverConn) serve(conf http2Config) {
 	sc.serveG.check()
 	defer sc.notePanic()
 	defer sc.conn.Close()
@@ -937,18 +915,18 @@ func (sc *serverConn) serve() {
 
 	sc.writeFrame(FrameWriteRequest{
 		write: writeSettings{
-			{SettingMaxFrameSize, sc.srv.maxReadFrameSize()},
+			{SettingMaxFrameSize, conf.MaxReadFrameSize},
 			{SettingMaxConcurrentStreams, sc.advMaxStreams},
 			{SettingMaxHeaderListSize, sc.maxHeaderListSize()},
-			{SettingHeaderTableSize, sc.srv.maxDecoderHeaderTableSize()},
-			{SettingInitialWindowSize, uint32(sc.srv.initialStreamRecvWindowSize())},
+			{SettingHeaderTableSize, conf.MaxDecoderHeaderTableSize},
+			{SettingInitialWindowSize, uint32(sc.initialStreamRecvWindowSize)},
 		},
 	})
 	sc.unackedSettings++
 
 	// Each connection starts with initialWindowSize inflow tokens.
 	// If a higher value is configured, we add more tokens.
-	if diff := sc.srv.initialConnRecvWindowSize() - initialWindowSize; diff > 0 {
+	if diff := conf.MaxUploadBufferPerConnection - initialWindowSize; diff > 0 {
 		sc.sendWindowUpdate(nil, int(diff))
 	}
 
@@ -968,11 +946,18 @@ func (sc *serverConn) serve() {
 		defer sc.idleTimer.Stop()
 	}
 
+	if conf.SendPingTimeout > 0 {
+		sc.readIdleTimeout = conf.SendPingTimeout
+		sc.readIdleTimer = sc.srv.afterFunc(conf.SendPingTimeout, sc.onReadIdleTimer)
+		defer sc.readIdleTimer.Stop()
+	}
+
 	go sc.readFrames() // closed by defer sc.conn.Close above
 
 	settingsTimer := sc.srv.afterFunc(firstSettingsTimeout, sc.onSettingsTimer)
 	defer settingsTimer.Stop()
 
+	lastFrameTime := sc.srv.now()
 	loopNum := 0
 	for {
 		loopNum++
@@ -986,6 +971,7 @@ func (sc *serverConn) serve() {
 		case res := <-sc.wroteFrameCh:
 			sc.wroteFrame(res)
 		case res := <-sc.readFrameCh:
+			lastFrameTime = sc.srv.now()
 			// Process any written frames before reading new frames from the client since a
 			// written frame could have triggered a new stream to be started.
 			if sc.writingFrameAsync {
@@ -1017,6 +1003,8 @@ func (sc *serverConn) serve() {
 				case idleTimerMsg:
 					sc.vlogf("connection is idle")
 					sc.goAway(ErrCodeNo)
+				case readIdleTimerMsg:
+					sc.handlePingTimer(lastFrameTime)
 				case shutdownTimerMsg:
 					sc.vlogf("GOAWAY close timer fired; closing conn from %v", sc.conn.RemoteAddr())
 					return
@@ -1039,7 +1027,7 @@ func (sc *serverConn) serve() {
 		// If the peer is causing us to generate a lot of control frames,
 		// but not reading them from us, assume they are trying to make us
 		// run out of memory.
-		if sc.queuedControlFrames > sc.srv.maxQueuedControlFrames() {
+		if sc.queuedControlFrames > maxQueuedControlFrames {
 			sc.vlogf("http2: too many control frames in send queue, closing connection")
 			return
 		}
@@ -1055,12 +1043,39 @@ func (sc *serverConn) serve() {
 	}
 }
 
+func (sc *serverConn) handlePingTimer(lastFrameReadTime time.Time) {
+	if sc.pingSent {
+		sc.vlogf("timeout waiting for PING response")
+		sc.conn.Close()
+		return
+	}
+
+	pingAt := lastFrameReadTime.Add(sc.readIdleTimeout)
+	now := sc.srv.now()
+	if pingAt.After(now) {
+		// We received frames since arming the ping timer.
+		// Reset it for the next possible timeout.
+		sc.readIdleTimer.Reset(pingAt.Sub(now))
+		return
+	}
+
+	sc.pingSent = true
+	// Ignore crypto/rand.Read errors: It generally can't fail, and worse case if it does
+	// is we send a PING frame containing 0s.
+	_, _ = rand.Read(sc.sentPingData[:])
+	sc.writeFrame(FrameWriteRequest{
+		write: &writePing{data: sc.sentPingData},
+	})
+	sc.readIdleTimer.Reset(sc.pingTimeout)
+}
+
 type serverMessage int
 
 // Message values sent to serveMsgCh.
 var (
 	settingsTimerMsg    = new(serverMessage)
 	idleTimerMsg        = new(serverMessage)
+	readIdleTimerMsg    = new(serverMessage)
 	shutdownTimerMsg    = new(serverMessage)
 	gracefulShutdownMsg = new(serverMessage)
 	handlerDoneMsg      = new(serverMessage)
@@ -1068,6 +1083,7 @@ var (
 
 func (sc *serverConn) onSettingsTimer() { sc.sendServeMsg(settingsTimerMsg) }
 func (sc *serverConn) onIdleTimer()     { sc.sendServeMsg(idleTimerMsg) }
+func (sc *serverConn) onReadIdleTimer() { sc.sendServeMsg(readIdleTimerMsg) }
 func (sc *serverConn) onShutdownTimer() { sc.sendServeMsg(shutdownTimerMsg) }
 
 func (sc *serverConn) sendServeMsg(msg interface{}) {
@@ -1320,6 +1336,10 @@ func (sc *serverConn) wroteFrame(res frameWriteResult) {
 	sc.writingFrame = false
 	sc.writingFrameAsync = false
 
+	if res.err != nil {
+		sc.conn.Close()
+	}
+
 	wr := res.wr
 
 	if writeEndsStream(wr.write) {
@@ -1594,6 +1614,11 @@ func (sc *serverConn) processFrame(f Frame) error {
 func (sc *serverConn) processPing(f *PingFrame) error {
 	sc.serveG.check()
 	if f.IsAck() {
+		if sc.pingSent && sc.sentPingData == f.Data {
+			// This is a response to a PING we sent.
+			sc.pingSent = false
+			sc.readIdleTimer.Reset(sc.readIdleTimeout)
+		}
 		// 6.7 PING: " An endpoint MUST NOT respond to PING frames
 		// containing this flag."
 		return nil
@@ -2160,7 +2185,7 @@ func (sc *serverConn) newStream(id, pusherID uint32, state streamState) *stream
 	st.cw.Init()
 	st.flow.conn = &sc.flow // link to conn-level counter
 	st.flow.add(sc.initialStreamSendWindowSize)
-	st.inflow.init(sc.srv.initialStreamRecvWindowSize())
+	st.inflow.init(sc.initialStreamRecvWindowSize)
 	if sc.hs.WriteTimeout > 0 {
 		st.writeDeadline = sc.srv.afterFunc(sc.hs.WriteTimeout, st.onWriteTimeout)
 	}
@@ -3301,7 +3326,7 @@ func (sc *serverConn) countError(name string, err error) error {
 	if sc == nil || sc.srv == nil {
 		return err
 	}
-	f := sc.srv.CountError
+	f := sc.countErrorFunc
 	if f == nil {
 		return err
 	}
diff --git a/vendor/golang.org/x/net/http2/transport.go b/vendor/golang.org/x/net/http2/transport.go
index 61f511f9..0c5f64aa 100644
--- a/vendor/golang.org/x/net/http2/transport.go
+++ b/vendor/golang.org/x/net/http2/transport.go
@@ -25,7 +25,6 @@ import (
 	"net/http"
 	"net/http/httptrace"
 	"net/textproto"
-	"os"
 	"sort"
 	"strconv"
 	"strings"
@@ -227,40 +226,26 @@ func (t *Transport) contextWithTimeout(ctx context.Context, d time.Duration) (co
 }
 
 func (t *Transport) maxHeaderListSize() uint32 {
-	if t.MaxHeaderListSize == 0 {
+	n := int64(t.MaxHeaderListSize)
+	if t.t1 != nil && t.t1.MaxResponseHeaderBytes != 0 {
+		n = t.t1.MaxResponseHeaderBytes
+		if n > 0 {
+			n = adjustHTTP1MaxHeaderSize(n)
+		}
+	}
+	if n <= 0 {
 		return 10 << 20
 	}
-	if t.MaxHeaderListSize == 0xffffffff {
+	if n >= 0xffffffff {
 		return 0
 	}
-	return t.MaxHeaderListSize
-}
-
-func (t *Transport) maxFrameReadSize() uint32 {
-	if t.MaxReadFrameSize == 0 {
-		return 0 // use the default provided by the peer
-	}
-	if t.MaxReadFrameSize < minMaxFrameSize {
-		return minMaxFrameSize
-	}
-	if t.MaxReadFrameSize > maxFrameSize {
-		return maxFrameSize
-	}
-	return t.MaxReadFrameSize
+	return uint32(n)
 }
 
 func (t *Transport) disableCompression() bool {
 	return t.DisableCompression || (t.t1 != nil && t.t1.DisableCompression)
 }
 
-func (t *Transport) pingTimeout() time.Duration {
-	if t.PingTimeout == 0 {
-		return 15 * time.Second
-	}
-	return t.PingTimeout
-
-}
-
 // ConfigureTransport configures a net/http HTTP/1 Transport to use HTTP/2.
 // It returns an error if t1 has already been HTTP/2-enabled.
 //
@@ -370,11 +355,14 @@ type ClientConn struct {
 	lastActive      time.Time
 	lastIdle        time.Time // time last idle
 	// Settings from peer: (also guarded by wmu)
-	maxFrameSize           uint32
-	maxConcurrentStreams   uint32
-	peerMaxHeaderListSize  uint64
-	peerMaxHeaderTableSize uint32
-	initialWindowSize      uint32
+	maxFrameSize                uint32
+	maxConcurrentStreams        uint32
+	peerMaxHeaderListSize       uint64
+	peerMaxHeaderTableSize      uint32
+	initialWindowSize           uint32
+	initialStreamRecvWindowSize int32
+	readIdleTimeout             time.Duration
+	pingTimeout                 time.Duration
 
 	// reqHeaderMu is a 1-element semaphore channel controlling access to sending new requests.
 	// Write to reqHeaderMu to lock it, read from it to unlock.
@@ -499,6 +487,7 @@ func (cs *clientStream) closeReqBodyLocked() {
 }
 
 type stickyErrWriter struct {
+	group   synctestGroupInterface
 	conn    net.Conn
 	timeout time.Duration
 	err     *error
@@ -508,22 +497,9 @@ func (sew stickyErrWriter) Write(p []byte) (n int, err error) {
 	if *sew.err != nil {
 		return 0, *sew.err
 	}
-	for {
-		if sew.timeout != 0 {
-			sew.conn.SetWriteDeadline(time.Now().Add(sew.timeout))
-		}
-		nn, err := sew.conn.Write(p[n:])
-		n += nn
-		if n < len(p) && nn > 0 && errors.Is(err, os.ErrDeadlineExceeded) {
-			// Keep extending the deadline so long as we're making progress.
-			continue
-		}
-		if sew.timeout != 0 {
-			sew.conn.SetWriteDeadline(time.Time{})
-		}
-		*sew.err = err
-		return n, err
-	}
+	n, err = writeWithByteTimeout(sew.group, sew.conn, sew.timeout, p)
+	*sew.err = err
+	return n, err
 }
 
 // noCachedConnError is the concrete type of ErrNoCachedConn, which
@@ -758,44 +734,36 @@ func (t *Transport) expectContinueTimeout() time.Duration {
 	return t.t1.ExpectContinueTimeout
 }
 
-func (t *Transport) maxDecoderHeaderTableSize() uint32 {
-	if v := t.MaxDecoderHeaderTableSize; v > 0 {
-		return v
-	}
-	return initialHeaderTableSize
-}
-
-func (t *Transport) maxEncoderHeaderTableSize() uint32 {
-	if v := t.MaxEncoderHeaderTableSize; v > 0 {
-		return v
-	}
-	return initialHeaderTableSize
-}
-
 func (t *Transport) NewClientConn(c net.Conn) (*ClientConn, error) {
 	return t.newClientConn(c, t.disableKeepAlives())
 }
 
 func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, error) {
+	conf := configFromTransport(t)
 	cc := &ClientConn{
-		t:                     t,
-		tconn:                 c,
-		readerDone:            make(chan struct{}),
-		nextStreamID:          1,
-		maxFrameSize:          16 << 10,                    // spec default
-		initialWindowSize:     65535,                       // spec default
-		maxConcurrentStreams:  initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings.
-		peerMaxHeaderListSize: 0xffffffffffffffff,          // "infinite", per spec. Use 2^64-1 instead.
-		streams:               make(map[uint32]*clientStream),
-		singleUse:             singleUse,
-		wantSettingsAck:       true,
-		pings:                 make(map[[8]byte]chan struct{}),
-		reqHeaderMu:           make(chan struct{}, 1),
-	}
+		t:                           t,
+		tconn:                       c,
+		readerDone:                  make(chan struct{}),
+		nextStreamID:                1,
+		maxFrameSize:                16 << 10, // spec default
+		initialWindowSize:           65535,    // spec default
+		initialStreamRecvWindowSize: conf.MaxUploadBufferPerStream,
+		maxConcurrentStreams:        initialMaxConcurrentStreams, // "infinite", per spec. Use a smaller value until we have received server settings.
+		peerMaxHeaderListSize:       0xffffffffffffffff,          // "infinite", per spec. Use 2^64-1 instead.
+		streams:                     make(map[uint32]*clientStream),
+		singleUse:                   singleUse,
+		wantSettingsAck:             true,
+		readIdleTimeout:             conf.SendPingTimeout,
+		pingTimeout:                 conf.PingTimeout,
+		pings:                       make(map[[8]byte]chan struct{}),
+		reqHeaderMu:                 make(chan struct{}, 1),
+	}
+	var group synctestGroupInterface
 	if t.transportTestHooks != nil {
 		t.markNewGoroutine()
 		t.transportTestHooks.newclientconn(cc)
 		c = cc.tconn
+		group = t.group
 	}
 	if VerboseLogs {
 		t.vlogf("http2: Transport creating client conn %p to %v", cc, c.RemoteAddr())
@@ -807,24 +775,23 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
 	// TODO: adjust this writer size to account for frame size +
 	// MTU + crypto/tls record padding.
 	cc.bw = bufio.NewWriter(stickyErrWriter{
+		group:   group,
 		conn:    c,
-		timeout: t.WriteByteTimeout,
+		timeout: conf.WriteByteTimeout,
 		err:     &cc.werr,
 	})
 	cc.br = bufio.NewReader(c)
 	cc.fr = NewFramer(cc.bw, cc.br)
-	if t.maxFrameReadSize() != 0 {
-		cc.fr.SetMaxReadFrameSize(t.maxFrameReadSize())
-	}
+	cc.fr.SetMaxReadFrameSize(conf.MaxReadFrameSize)
 	if t.CountError != nil {
 		cc.fr.countError = t.CountError
 	}
-	maxHeaderTableSize := t.maxDecoderHeaderTableSize()
+	maxHeaderTableSize := conf.MaxDecoderHeaderTableSize
 	cc.fr.ReadMetaHeaders = hpack.NewDecoder(maxHeaderTableSize, nil)
 	cc.fr.MaxHeaderListSize = t.maxHeaderListSize()
 
 	cc.henc = hpack.NewEncoder(&cc.hbuf)
-	cc.henc.SetMaxDynamicTableSizeLimit(t.maxEncoderHeaderTableSize())
+	cc.henc.SetMaxDynamicTableSizeLimit(conf.MaxEncoderHeaderTableSize)
 	cc.peerMaxHeaderTableSize = initialHeaderTableSize
 
 	if cs, ok := c.(connectionStater); ok {
@@ -834,11 +801,9 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
 
 	initialSettings := []Setting{
 		{ID: SettingEnablePush, Val: 0},
-		{ID: SettingInitialWindowSize, Val: transportDefaultStreamFlow},
-	}
-	if max := t.maxFrameReadSize(); max != 0 {
-		initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: max})
+		{ID: SettingInitialWindowSize, Val: uint32(cc.initialStreamRecvWindowSize)},
 	}
+	initialSettings = append(initialSettings, Setting{ID: SettingMaxFrameSize, Val: conf.MaxReadFrameSize})
 	if max := t.maxHeaderListSize(); max != 0 {
 		initialSettings = append(initialSettings, Setting{ID: SettingMaxHeaderListSize, Val: max})
 	}
@@ -848,8 +813,8 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
 
 	cc.bw.Write(clientPreface)
 	cc.fr.WriteSettings(initialSettings...)
-	cc.fr.WriteWindowUpdate(0, transportDefaultConnFlow)
-	cc.inflow.init(transportDefaultConnFlow + initialWindowSize)
+	cc.fr.WriteWindowUpdate(0, uint32(conf.MaxUploadBufferPerConnection))
+	cc.inflow.init(conf.MaxUploadBufferPerConnection + initialWindowSize)
 	cc.bw.Flush()
 	if cc.werr != nil {
 		cc.Close()
@@ -867,7 +832,7 @@ func (t *Transport) newClientConn(c net.Conn, singleUse bool) (*ClientConn, erro
 }
 
 func (cc *ClientConn) healthCheck() {
-	pingTimeout := cc.t.pingTimeout()
+	pingTimeout := cc.pingTimeout
 	// We don't need to periodically ping in the health check, because the readLoop of ClientConn will
 	// trigger the healthCheck again if there is no frame received.
 	ctx, cancel := cc.t.contextWithTimeout(context.Background(), pingTimeout)
@@ -2199,7 +2164,7 @@ type resAndError struct {
 func (cc *ClientConn) addStreamLocked(cs *clientStream) {
 	cs.flow.add(int32(cc.initialWindowSize))
 	cs.flow.setConnFlow(&cc.flow)
-	cs.inflow.init(transportDefaultStreamFlow)
+	cs.inflow.init(cc.initialStreamRecvWindowSize)
 	cs.ID = cc.nextStreamID
 	cc.nextStreamID += 2
 	cc.streams[cs.ID] = cs
@@ -2345,7 +2310,7 @@ func (cc *ClientConn) countReadFrameError(err error) {
 func (rl *clientConnReadLoop) run() error {
 	cc := rl.cc
 	gotSettings := false
-	readIdleTimeout := cc.t.ReadIdleTimeout
+	readIdleTimeout := cc.readIdleTimeout
 	var t timer
 	if readIdleTimeout != 0 {
 		t = cc.t.afterFunc(readIdleTimeout, cc.healthCheck)
diff --git a/vendor/golang.org/x/net/http2/write.go b/vendor/golang.org/x/net/http2/write.go
index 33f61398..6ff6bee7 100644
--- a/vendor/golang.org/x/net/http2/write.go
+++ b/vendor/golang.org/x/net/http2/write.go
@@ -131,6 +131,16 @@ func (se StreamError) writeFrame(ctx writeContext) error {
 
 func (se StreamError) staysWithinBuffer(max int) bool { return frameHeaderLen+4 <= max }
 
+type writePing struct {
+	data [8]byte
+}
+
+func (w writePing) writeFrame(ctx writeContext) error {
+	return ctx.Framer().WritePing(false, w.data)
+}
+
+func (w writePing) staysWithinBuffer(max int) bool { return frameHeaderLen+len(w.data) <= max }
+
 type writePingAck struct{ pf *PingFrame }
 
 func (w writePingAck) writeFrame(ctx writeContext) error {
diff --git a/vendor/golang.org/x/net/websocket/websocket.go b/vendor/golang.org/x/net/websocket/websocket.go
index 923a5780..ac76165c 100644
--- a/vendor/golang.org/x/net/websocket/websocket.go
+++ b/vendor/golang.org/x/net/websocket/websocket.go
@@ -8,7 +8,7 @@
 // This package currently lacks some features found in an alternative
 // and more actively maintained WebSocket package:
 //
-//	https://pkg.go.dev/nhooyr.io/websocket
+//	https://pkg.go.dev/github.com/coder/websocket
 package websocket // import "golang.org/x/net/websocket"
 
 import (
diff --git a/vendor/golang.org/x/oauth2/token.go b/vendor/golang.org/x/oauth2/token.go
index 5bbb3321..109997d7 100644
--- a/vendor/golang.org/x/oauth2/token.go
+++ b/vendor/golang.org/x/oauth2/token.go
@@ -49,6 +49,13 @@ type Token struct {
 	// mechanisms for that TokenSource will not be used.
 	Expiry time.Time `json:"expiry,omitempty"`
 
+	// ExpiresIn is the OAuth2 wire format "expires_in" field,
+	// which specifies how many seconds later the token expires,
+	// relative to an unknown time base approximately around "now".
+	// It is the application's responsibility to populate
+	// `Expiry` from `ExpiresIn` when required.
+	ExpiresIn int64 `json:"expires_in,omitempty"`
+
 	// raw optionally contains extra metadata from the server
 	// when updating a token.
 	raw interface{}
diff --git a/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s
new file mode 100644
index 00000000..ec2acfe5
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/asm_darwin_x86_gc.s
@@ -0,0 +1,17 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin && amd64 && gc
+
+#include "textflag.h"
+
+TEXT libc_sysctl_trampoline<>(SB),NOSPLIT,$0-0
+	JMP	libc_sysctl(SB)
+GLOBL	·libc_sysctl_trampoline_addr(SB), RODATA, $8
+DATA	·libc_sysctl_trampoline_addr(SB)/8, $libc_sysctl_trampoline<>(SB)
+
+TEXT libc_sysctlbyname_trampoline<>(SB),NOSPLIT,$0-0
+	JMP	libc_sysctlbyname(SB)
+GLOBL	·libc_sysctlbyname_trampoline_addr(SB), RODATA, $8
+DATA	·libc_sysctlbyname_trampoline_addr(SB)/8, $libc_sysctlbyname_trampoline<>(SB)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go
new file mode 100644
index 00000000..b838cb9e
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_darwin_x86.go
@@ -0,0 +1,61 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build darwin && amd64 && gc
+
+package cpu
+
+// darwinSupportsAVX512 checks Darwin kernel for AVX512 support via sysctl
+// call (see issue 43089). It also restricts AVX512 support for Darwin to
+// kernel version 21.3.0 (MacOS 12.2.0) or later (see issue 49233).
+//
+// Background:
+// Darwin implements a special mechanism to economize on thread state when
+// AVX512 specific registers are not in use. This scheme minimizes state when
+// preempting threads that haven't yet used any AVX512 instructions, but adds
+// special requirements to check for AVX512 hardware support at runtime (e.g.
+// via sysctl call or commpage inspection). See issue 43089 and link below for
+// full background:
+// https://github.com/apple-oss-distributions/xnu/blob/xnu-11215.1.10/osfmk/i386/fpu.c#L214-L240
+//
+// Additionally, all versions of the Darwin kernel from 19.6.0 through 21.2.0
+// (corresponding to MacOS 10.15.6 - 12.1) have a bug that can cause corruption
+// of the AVX512 mask registers (K0-K7) upon signal return. For this reason
+// AVX512 is considered unsafe to use on Darwin for kernel versions prior to
+// 21.3.0, where a fix has been confirmed. See issue 49233 for full background.
+func darwinSupportsAVX512() bool {
+	return darwinSysctlEnabled([]byte("hw.optional.avx512f\x00")) && darwinKernelVersionCheck(21, 3, 0)
+}
+
+// Ensure Darwin kernel version is at least major.minor.patch, avoiding dependencies
+func darwinKernelVersionCheck(major, minor, patch int) bool {
+	var release [256]byte
+	err := darwinOSRelease(&release)
+	if err != nil {
+		return false
+	}
+
+	var mmp [3]int
+	c := 0
+Loop:
+	for _, b := range release[:] {
+		switch {
+		case b >= '0' && b <= '9':
+			mmp[c] = 10*mmp[c] + int(b-'0')
+		case b == '.':
+			c++
+			if c > 2 {
+				return false
+			}
+		case b == 0:
+			break Loop
+		default:
+			return false
+		}
+	}
+	if c != 2 {
+		return false
+	}
+	return mmp[0] > major || mmp[0] == major && (mmp[1] > minor || mmp[1] == minor && mmp[2] >= patch)
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
index 910728fb..32a44514 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.go
@@ -6,10 +6,10 @@
 
 package cpu
 
-// cpuid is implemented in cpu_x86.s for gc compiler
+// cpuid is implemented in cpu_gc_x86.s for gc compiler
 // and in cpu_gccgo.c for gccgo.
 func cpuid(eaxArg, ecxArg uint32) (eax, ebx, ecx, edx uint32)
 
-// xgetbv with ecx = 0 is implemented in cpu_x86.s for gc compiler
+// xgetbv with ecx = 0 is implemented in cpu_gc_x86.s for gc compiler
 // and in cpu_gccgo.c for gccgo.
 func xgetbv() (eax, edx uint32)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.s b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s
similarity index 94%
rename from vendor/golang.org/x/sys/cpu/cpu_x86.s
rename to vendor/golang.org/x/sys/cpu/cpu_gc_x86.s
index 7d7ba33e..ce208ce6 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.s
+++ b/vendor/golang.org/x/sys/cpu/cpu_gc_x86.s
@@ -18,7 +18,7 @@ TEXT ·cpuid(SB), NOSPLIT, $0-24
 	RET
 
 // func xgetbv() (eax, edx uint32)
-TEXT ·xgetbv(SB),NOSPLIT,$0-8
+TEXT ·xgetbv(SB), NOSPLIT, $0-8
 	MOVL $0, CX
 	XGETBV
 	MOVL AX, eax+0(FP)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
index 99c60fe9..170d21dd 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_gccgo_x86.go
@@ -23,9 +23,3 @@ func xgetbv() (eax, edx uint32) {
 	gccgoXgetbv(&a, &d)
 	return a, d
 }
-
-// gccgo doesn't build on Darwin, per:
-// https://github.com/Homebrew/homebrew-core/blob/HEAD/Formula/gcc.rb#L76
-func darwinSupportsAVX512() bool {
-	return false
-}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
index 08f35ea1..f1caf0f7 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_linux_arm64.go
@@ -110,7 +110,6 @@ func doinit() {
 	ARM64.HasASIMDFHM = isSet(hwCap, hwcap_ASIMDFHM)
 	ARM64.HasDIT = isSet(hwCap, hwcap_DIT)
 
-
 	// HWCAP2 feature bits
 	ARM64.HasSVE2 = isSet(hwCap2, hwcap2_SVE2)
 	ARM64.HasI8MM = isSet(hwCap2, hwcap2_I8MM)
diff --git a/vendor/golang.org/x/sys/cpu/cpu_other_x86.go b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go
new file mode 100644
index 00000000..a0fd7e2f
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/cpu_other_x86.go
@@ -0,0 +1,11 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+//go:build 386 || amd64p32 || (amd64 && (!darwin || !gc))
+
+package cpu
+
+func darwinSupportsAVX512() bool {
+	panic("only implemented for gc && amd64 && darwin")
+}
diff --git a/vendor/golang.org/x/sys/cpu/cpu_x86.go b/vendor/golang.org/x/sys/cpu/cpu_x86.go
index c29f5e4c..600a6807 100644
--- a/vendor/golang.org/x/sys/cpu/cpu_x86.go
+++ b/vendor/golang.org/x/sys/cpu/cpu_x86.go
@@ -92,10 +92,8 @@ func archInit() {
 		osSupportsAVX = isSet(1, eax) && isSet(2, eax)
 
 		if runtime.GOOS == "darwin" {
-			// Darwin doesn't save/restore AVX-512 mask registers correctly across signal handlers.
-			// Since users can't rely on mask register contents, let's not advertise AVX-512 support.
-			// See issue 49233.
-			osSupportsAVX512 = false
+			// Darwin requires special AVX512 checks, see cpu_darwin_x86.go
+			osSupportsAVX512 = osSupportsAVX && darwinSupportsAVX512()
 		} else {
 			// Check if OPMASK and ZMM registers have OS support.
 			osSupportsAVX512 = osSupportsAVX && isSet(5, eax) && isSet(6, eax) && isSet(7, eax)
diff --git a/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go
new file mode 100644
index 00000000..4d0888b0
--- /dev/null
+++ b/vendor/golang.org/x/sys/cpu/syscall_darwin_x86_gc.go
@@ -0,0 +1,98 @@
+// Copyright 2024 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+// Minimal copy of x/sys/unix so the cpu package can make a
+// system call on Darwin without depending on x/sys/unix.
+
+//go:build darwin && amd64 && gc
+
+package cpu
+
+import (
+	"syscall"
+	"unsafe"
+)
+
+type _C_int int32
+
+// adapted from unix.Uname() at x/sys/unix/syscall_darwin.go L419
+func darwinOSRelease(release *[256]byte) error {
+	// from x/sys/unix/zerrors_openbsd_amd64.go
+	const (
+		CTL_KERN       = 0x1
+		KERN_OSRELEASE = 0x2
+	)
+
+	mib := []_C_int{CTL_KERN, KERN_OSRELEASE}
+	n := unsafe.Sizeof(*release)
+
+	return sysctl(mib, &release[0], &n, nil, 0)
+}
+
+type Errno = syscall.Errno
+
+var _zero uintptr // Single-word zero for use when we need a valid pointer to 0 bytes.
+
+// from x/sys/unix/zsyscall_darwin_amd64.go L791-807
+func sysctl(mib []_C_int, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error {
+	var _p0 unsafe.Pointer
+	if len(mib) > 0 {
+		_p0 = unsafe.Pointer(&mib[0])
+	} else {
+		_p0 = unsafe.Pointer(&_zero)
+	}
+	if _, _, err := syscall_syscall6(
+		libc_sysctl_trampoline_addr,
+		uintptr(_p0),
+		uintptr(len(mib)),
+		uintptr(unsafe.Pointer(old)),
+		uintptr(unsafe.Pointer(oldlen)),
+		uintptr(unsafe.Pointer(new)),
+		uintptr(newlen),
+	); err != 0 {
+		return err
+	}
+
+	return nil
+}
+
+var libc_sysctl_trampoline_addr uintptr
+
+// adapted from internal/cpu/cpu_arm64_darwin.go
+func darwinSysctlEnabled(name []byte) bool {
+	out := int32(0)
+	nout := unsafe.Sizeof(out)
+	if ret := sysctlbyname(&name[0], (*byte)(unsafe.Pointer(&out)), &nout, nil, 0); ret != nil {
+		return false
+	}
+	return out > 0
+}
+
+//go:cgo_import_dynamic libc_sysctl sysctl "/usr/lib/libSystem.B.dylib"
+
+var libc_sysctlbyname_trampoline_addr uintptr
+
+// adapted from runtime/sys_darwin.go in the pattern of sysctl() above, as defined in x/sys/unix
+func sysctlbyname(name *byte, old *byte, oldlen *uintptr, new *byte, newlen uintptr) error {
+	if _, _, err := syscall_syscall6(
+		libc_sysctlbyname_trampoline_addr,
+		uintptr(unsafe.Pointer(name)),
+		uintptr(unsafe.Pointer(old)),
+		uintptr(unsafe.Pointer(oldlen)),
+		uintptr(unsafe.Pointer(new)),
+		uintptr(newlen),
+		0,
+	); err != 0 {
+		return err
+	}
+
+	return nil
+}
+
+//go:cgo_import_dynamic libc_sysctlbyname sysctlbyname "/usr/lib/libSystem.B.dylib"
+
+// Implemented in the runtime package (runtime/sys_darwin.go)
+func syscall_syscall6(fn, a1, a2, a3, a4, a5, a6 uintptr) (r1, r2 uintptr, err Errno)
+
+//go:linkname syscall_syscall6 syscall.syscall6
diff --git a/vendor/golang.org/x/sys/unix/ioctl_linux.go b/vendor/golang.org/x/sys/unix/ioctl_linux.go
index dbe680ea..7ca4fa12 100644
--- a/vendor/golang.org/x/sys/unix/ioctl_linux.go
+++ b/vendor/golang.org/x/sys/unix/ioctl_linux.go
@@ -58,6 +58,102 @@ func IoctlGetEthtoolDrvinfo(fd int, ifname string) (*EthtoolDrvinfo, error) {
 	return &value, err
 }
 
+// IoctlGetEthtoolTsInfo fetches ethtool timestamping and PHC
+// association for the network device specified by ifname.
+func IoctlGetEthtoolTsInfo(fd int, ifname string) (*EthtoolTsInfo, error) {
+	ifr, err := NewIfreq(ifname)
+	if err != nil {
+		return nil, err
+	}
+
+	value := EthtoolTsInfo{Cmd: ETHTOOL_GET_TS_INFO}
+	ifrd := ifr.withData(unsafe.Pointer(&value))
+
+	err = ioctlIfreqData(fd, SIOCETHTOOL, &ifrd)
+	return &value, err
+}
+
+// IoctlGetHwTstamp retrieves the hardware timestamping configuration
+// for the network device specified by ifname.
+func IoctlGetHwTstamp(fd int, ifname string) (*HwTstampConfig, error) {
+	ifr, err := NewIfreq(ifname)
+	if err != nil {
+		return nil, err
+	}
+
+	value := HwTstampConfig{}
+	ifrd := ifr.withData(unsafe.Pointer(&value))
+
+	err = ioctlIfreqData(fd, SIOCGHWTSTAMP, &ifrd)
+	return &value, err
+}
+
+// IoctlSetHwTstamp updates the hardware timestamping configuration for
+// the network device specified by ifname.
+func IoctlSetHwTstamp(fd int, ifname string, cfg *HwTstampConfig) error {
+	ifr, err := NewIfreq(ifname)
+	if err != nil {
+		return err
+	}
+	ifrd := ifr.withData(unsafe.Pointer(cfg))
+	return ioctlIfreqData(fd, SIOCSHWTSTAMP, &ifrd)
+}
+
+// FdToClockID derives the clock ID from the file descriptor number
+// - see clock_gettime(3), FD_TO_CLOCKID macros. The resulting ID is
+// suitable for system calls like ClockGettime.
+func FdToClockID(fd int) int32 { return int32((int(^fd) << 3) | 3) }
+
+// IoctlPtpClockGetcaps returns the description of a given PTP device.
+func IoctlPtpClockGetcaps(fd int) (*PtpClockCaps, error) {
+	var value PtpClockCaps
+	err := ioctlPtr(fd, PTP_CLOCK_GETCAPS2, unsafe.Pointer(&value))
+	return &value, err
+}
+
+// IoctlPtpSysOffsetPrecise returns a description of the clock
+// offset compared to the system clock.
+func IoctlPtpSysOffsetPrecise(fd int) (*PtpSysOffsetPrecise, error) {
+	var value PtpSysOffsetPrecise
+	err := ioctlPtr(fd, PTP_SYS_OFFSET_PRECISE2, unsafe.Pointer(&value))
+	return &value, err
+}
+
+// IoctlPtpSysOffsetExtended returns an extended description of the
+// clock offset compared to the system clock. The samples parameter
+// specifies the desired number of measurements.
+func IoctlPtpSysOffsetExtended(fd int, samples uint) (*PtpSysOffsetExtended, error) {
+	value := PtpSysOffsetExtended{Samples: uint32(samples)}
+	err := ioctlPtr(fd, PTP_SYS_OFFSET_EXTENDED2, unsafe.Pointer(&value))
+	return &value, err
+}
+
+// IoctlPtpPinGetfunc returns the configuration of the specified
+// I/O pin on given PTP device.
+func IoctlPtpPinGetfunc(fd int, index uint) (*PtpPinDesc, error) {
+	value := PtpPinDesc{Index: uint32(index)}
+	err := ioctlPtr(fd, PTP_PIN_GETFUNC2, unsafe.Pointer(&value))
+	return &value, err
+}
+
+// IoctlPtpPinSetfunc updates configuration of the specified PTP
+// I/O pin.
+func IoctlPtpPinSetfunc(fd int, pd *PtpPinDesc) error {
+	return ioctlPtr(fd, PTP_PIN_SETFUNC2, unsafe.Pointer(pd))
+}
+
+// IoctlPtpPeroutRequest configures the periodic output mode of the
+// PTP I/O pins.
+func IoctlPtpPeroutRequest(fd int, r *PtpPeroutRequest) error {
+	return ioctlPtr(fd, PTP_PEROUT_REQUEST2, unsafe.Pointer(r))
+}
+
+// IoctlPtpExttsRequest configures the external timestamping mode
+// of the PTP I/O pins.
+func IoctlPtpExttsRequest(fd int, r *PtpExttsRequest) error {
+	return ioctlPtr(fd, PTP_EXTTS_REQUEST2, unsafe.Pointer(r))
+}
+
 // IoctlGetWatchdogInfo fetches information about a watchdog device from the
 // Linux watchdog API. For more information, see:
 // https://www.kernel.org/doc/html/latest/watchdog/watchdog-api.html.
diff --git a/vendor/golang.org/x/sys/unix/mkerrors.sh b/vendor/golang.org/x/sys/unix/mkerrors.sh
index ac54ecab..6ab02b6c 100644
--- a/vendor/golang.org/x/sys/unix/mkerrors.sh
+++ b/vendor/golang.org/x/sys/unix/mkerrors.sh
@@ -158,6 +158,16 @@ includes_Linux='
 #endif
 #define _GNU_SOURCE
 
+// See the description in unix/linux/types.go
+#if defined(__ARM_EABI__) || \
+	(defined(__mips__) && (_MIPS_SIM == _ABIO32)) || \
+	(defined(__powerpc__) && (!defined(__powerpc64__)))
+# ifdef   _TIME_BITS
+#  undef  _TIME_BITS
+# endif
+# define  _TIME_BITS 32
+#endif
+
 // <sys/ioctl.h> is broken on powerpc64, as it fails to include definitions of
 // these structures. We just include them copied from <bits/termios.h>.
 #if defined(__powerpc__)
@@ -256,6 +266,7 @@ struct ltchars {
 #include <linux/nsfs.h>
 #include <linux/perf_event.h>
 #include <linux/pps.h>
+#include <linux/ptp_clock.h>
 #include <linux/ptrace.h>
 #include <linux/random.h>
 #include <linux/reboot.h>
@@ -527,6 +538,7 @@ ccflags="$@"
 		$2 ~ /^(AF|SOCK|SO|SOL|IPPROTO|IP|IPV6|TCP|MCAST|EVFILT|NOTE|SHUT|PROT|MAP|MREMAP|MFD|T?PACKET|MSG|SCM|MCL|DT|MADV|PR|LOCAL|TCPOPT|UDP)_/ ||
 		$2 ~ /^NFC_(GENL|PROTO|COMM|RF|SE|DIRECTION|LLCP|SOCKPROTO)_/ ||
 		$2 ~ /^NFC_.*_(MAX)?SIZE$/ ||
+		$2 ~ /^PTP_/ ||
 		$2 ~ /^RAW_PAYLOAD_/ ||
 		$2 ~ /^[US]F_/ ||
 		$2 ~ /^TP_STATUS_/ ||
diff --git a/vendor/golang.org/x/sys/unix/syscall_linux.go b/vendor/golang.org/x/sys/unix/syscall_linux.go
index f08abd43..230a9454 100644
--- a/vendor/golang.org/x/sys/unix/syscall_linux.go
+++ b/vendor/golang.org/x/sys/unix/syscall_linux.go
@@ -1860,6 +1860,7 @@ func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err e
 //sys	ClockAdjtime(clockid int32, buf *Timex) (state int, err error)
 //sys	ClockGetres(clockid int32, res *Timespec) (err error)
 //sys	ClockGettime(clockid int32, time *Timespec) (err error)
+//sys	ClockSettime(clockid int32, time *Timespec) (err error)
 //sys	ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error)
 //sys	Close(fd int) (err error)
 //sys	CloseRange(first uint, last uint, flags uint) (err error)
diff --git a/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go b/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go
index 312ae6ac..7bf5c04b 100644
--- a/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go
+++ b/vendor/golang.org/x/sys/unix/syscall_zos_s390x.go
@@ -768,6 +768,15 @@ func Munmap(b []byte) (err error) {
 	return mapper.Munmap(b)
 }
 
+func MmapPtr(fd int, offset int64, addr unsafe.Pointer, length uintptr, prot int, flags int) (ret unsafe.Pointer, err error) {
+	xaddr, err := mapper.mmap(uintptr(addr), length, prot, flags, fd, offset)
+	return unsafe.Pointer(xaddr), err
+}
+
+func MunmapPtr(addr unsafe.Pointer, length uintptr) (err error) {
+	return mapper.munmap(uintptr(addr), length)
+}
+
 //sys   Gethostname(buf []byte) (err error) = SYS___GETHOSTNAME_A
 //sysnb	Getgid() (gid int)
 //sysnb	Getpid() (pid int)
@@ -816,10 +825,10 @@ func Lstat(path string, stat *Stat_t) (err error) {
 // for checking symlinks begins with $VERSION/ $SYSNAME/ $SYSSYMR/ $SYSSYMA/
 func isSpecialPath(path []byte) (v bool) {
 	var special = [4][8]byte{
-		[8]byte{'V', 'E', 'R', 'S', 'I', 'O', 'N', '/'},
-		[8]byte{'S', 'Y', 'S', 'N', 'A', 'M', 'E', '/'},
-		[8]byte{'S', 'Y', 'S', 'S', 'Y', 'M', 'R', '/'},
-		[8]byte{'S', 'Y', 'S', 'S', 'Y', 'M', 'A', '/'}}
+		{'V', 'E', 'R', 'S', 'I', 'O', 'N', '/'},
+		{'S', 'Y', 'S', 'N', 'A', 'M', 'E', '/'},
+		{'S', 'Y', 'S', 'S', 'Y', 'M', 'R', '/'},
+		{'S', 'Y', 'S', 'S', 'Y', 'M', 'A', '/'}}
 
 	var i, j int
 	for i = 0; i < len(special); i++ {
@@ -3115,3 +3124,90 @@ func legacy_Mkfifoat(dirfd int, path string, mode uint32) (err error) {
 //sys	Posix_openpt(oflag int) (fd int, err error) = SYS_POSIX_OPENPT
 //sys	Grantpt(fildes int) (rc int, err error) = SYS_GRANTPT
 //sys	Unlockpt(fildes int) (rc int, err error) = SYS_UNLOCKPT
+
+func fcntlAsIs(fd uintptr, cmd int, arg uintptr) (val int, err error) {
+	runtime.EnterSyscall()
+	r0, e2, e1 := CallLeFuncWithErr(GetZosLibVec()+SYS_FCNTL<<4, uintptr(fd), uintptr(cmd), arg)
+	runtime.ExitSyscall()
+	val = int(r0)
+	if int64(r0) == -1 {
+		err = errnoErr2(e1, e2)
+	}
+	return
+}
+
+func Fcntl(fd uintptr, cmd int, op interface{}) (ret int, err error) {
+	switch op.(type) {
+	case *Flock_t:
+		err = FcntlFlock(fd, cmd, op.(*Flock_t))
+		if err != nil {
+			ret = -1
+		}
+		return
+	case int:
+		return FcntlInt(fd, cmd, op.(int))
+	case *F_cnvrt:
+		return fcntlAsIs(fd, cmd, uintptr(unsafe.Pointer(op.(*F_cnvrt))))
+	case unsafe.Pointer:
+		return fcntlAsIs(fd, cmd, uintptr(op.(unsafe.Pointer)))
+	default:
+		return -1, EINVAL
+	}
+	return
+}
+
+func Sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) {
+	if raceenabled {
+		raceReleaseMerge(unsafe.Pointer(&ioSync))
+	}
+	return sendfile(outfd, infd, offset, count)
+}
+
+func sendfile(outfd int, infd int, offset *int64, count int) (written int, err error) {
+	// TODO: use LE call instead if the call is implemented
+	originalOffset, err := Seek(infd, 0, SEEK_CUR)
+	if err != nil {
+		return -1, err
+	}
+	//start reading data from in_fd
+	if offset != nil {
+		_, err := Seek(infd, *offset, SEEK_SET)
+		if err != nil {
+			return -1, err
+		}
+	}
+
+	buf := make([]byte, count)
+	readBuf := make([]byte, 0)
+	var n int = 0
+	for i := 0; i < count; i += n {
+		n, err := Read(infd, buf)
+		if n == 0 {
+			if err != nil {
+				return -1, err
+			} else { // EOF
+				break
+			}
+		}
+		readBuf = append(readBuf, buf...)
+		buf = buf[0:0]
+	}
+
+	n2, err := Write(outfd, readBuf)
+	if err != nil {
+		return -1, err
+	}
+
+	//When sendfile() returns, this variable will be set to the
+	// offset of the byte following the last byte that was read.
+	if offset != nil {
+		*offset = *offset + int64(n)
+		// If offset is not NULL, then sendfile() does not modify the file
+		// offset of in_fd
+		_, err := Seek(infd, originalOffset, SEEK_SET)
+		if err != nil {
+			return -1, err
+		}
+	}
+	return n2, nil
+}
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux.go b/vendor/golang.org/x/sys/unix/zerrors_linux.go
index de3b4624..6ebc48b3 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux.go
@@ -321,6 +321,9 @@ const (
 	AUDIT_INTEGRITY_STATUS                      = 0x70a
 	AUDIT_IPC                                   = 0x517
 	AUDIT_IPC_SET_PERM                          = 0x51f
+	AUDIT_IPE_ACCESS                            = 0x58c
+	AUDIT_IPE_CONFIG_CHANGE                     = 0x58d
+	AUDIT_IPE_POLICY_LOAD                       = 0x58e
 	AUDIT_KERNEL                                = 0x7d0
 	AUDIT_KERNEL_OTHER                          = 0x524
 	AUDIT_KERN_MODULE                           = 0x532
@@ -489,6 +492,7 @@ const (
 	BPF_F_ID                                    = 0x20
 	BPF_F_NETFILTER_IP_DEFRAG                   = 0x1
 	BPF_F_QUERY_EFFECTIVE                       = 0x1
+	BPF_F_REDIRECT_FLAGS                        = 0x19
 	BPF_F_REPLACE                               = 0x4
 	BPF_F_SLEEPABLE                             = 0x10
 	BPF_F_STRICT_ALIGNMENT                      = 0x1
@@ -1166,6 +1170,7 @@ const (
 	EXTA                                        = 0xe
 	EXTB                                        = 0xf
 	F2FS_SUPER_MAGIC                            = 0xf2f52010
+	FALLOC_FL_ALLOCATE_RANGE                    = 0x0
 	FALLOC_FL_COLLAPSE_RANGE                    = 0x8
 	FALLOC_FL_INSERT_RANGE                      = 0x20
 	FALLOC_FL_KEEP_SIZE                         = 0x1
@@ -1799,6 +1804,8 @@ const (
 	LANDLOCK_ACCESS_NET_BIND_TCP                = 0x1
 	LANDLOCK_ACCESS_NET_CONNECT_TCP             = 0x2
 	LANDLOCK_CREATE_RULESET_VERSION             = 0x1
+	LANDLOCK_SCOPE_ABSTRACT_UNIX_SOCKET         = 0x1
+	LANDLOCK_SCOPE_SIGNAL                       = 0x2
 	LINUX_REBOOT_CMD_CAD_OFF                    = 0x0
 	LINUX_REBOOT_CMD_CAD_ON                     = 0x89abcdef
 	LINUX_REBOOT_CMD_HALT                       = 0xcdef0123
@@ -1924,6 +1931,7 @@ const (
 	MNT_FORCE                                   = 0x1
 	MNT_ID_REQ_SIZE_VER0                        = 0x18
 	MNT_ID_REQ_SIZE_VER1                        = 0x20
+	MNT_NS_INFO_SIZE_VER0                       = 0x10
 	MODULE_INIT_COMPRESSED_FILE                 = 0x4
 	MODULE_INIT_IGNORE_MODVERSIONS              = 0x1
 	MODULE_INIT_IGNORE_VERMAGIC                 = 0x2
@@ -2625,6 +2633,28 @@ const (
 	PR_UNALIGN_NOPRINT                          = 0x1
 	PR_UNALIGN_SIGBUS                           = 0x2
 	PSTOREFS_MAGIC                              = 0x6165676c
+	PTP_CLK_MAGIC                               = '='
+	PTP_ENABLE_FEATURE                          = 0x1
+	PTP_EXTTS_EDGES                             = 0x6
+	PTP_EXTTS_EVENT_VALID                       = 0x1
+	PTP_EXTTS_V1_VALID_FLAGS                    = 0x7
+	PTP_EXTTS_VALID_FLAGS                       = 0x1f
+	PTP_EXT_OFFSET                              = 0x10
+	PTP_FALLING_EDGE                            = 0x4
+	PTP_MAX_SAMPLES                             = 0x19
+	PTP_PEROUT_DUTY_CYCLE                       = 0x2
+	PTP_PEROUT_ONE_SHOT                         = 0x1
+	PTP_PEROUT_PHASE                            = 0x4
+	PTP_PEROUT_V1_VALID_FLAGS                   = 0x0
+	PTP_PEROUT_VALID_FLAGS                      = 0x7
+	PTP_PIN_GETFUNC                             = 0xc0603d06
+	PTP_PIN_GETFUNC2                            = 0xc0603d0f
+	PTP_RISING_EDGE                             = 0x2
+	PTP_STRICT_FLAGS                            = 0x8
+	PTP_SYS_OFFSET_EXTENDED                     = 0xc4c03d09
+	PTP_SYS_OFFSET_EXTENDED2                    = 0xc4c03d12
+	PTP_SYS_OFFSET_PRECISE                      = 0xc0403d08
+	PTP_SYS_OFFSET_PRECISE2                     = 0xc0403d11
 	PTRACE_ATTACH                               = 0x10
 	PTRACE_CONT                                 = 0x7
 	PTRACE_DETACH                               = 0x11
@@ -2948,6 +2978,7 @@ const (
 	RWF_WRITE_LIFE_NOT_SET                      = 0x0
 	SCHED_BATCH                                 = 0x3
 	SCHED_DEADLINE                              = 0x6
+	SCHED_EXT                                   = 0x7
 	SCHED_FIFO                                  = 0x1
 	SCHED_FLAG_ALL                              = 0x7f
 	SCHED_FLAG_DL_OVERRUN                       = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go
index 8aa6d77c..c0d45e32 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_386.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_386.go
@@ -109,6 +109,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x80084803
 	HIDIOCGRDESC                     = 0x90044802
 	HIDIOCGRDESCSIZE                 = 0x80044801
+	HIDIOCREVOKE                     = 0x4004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -237,6 +238,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x7434
 	PPPIOCXFERUNIT                   = 0x744e
 	PR_SET_PTRACER_ANY               = 0xffffffff
+	PTP_CLOCK_GETCAPS                = 0x80503d01
+	PTP_CLOCK_GETCAPS2               = 0x80503d0a
+	PTP_ENABLE_PPS                   = 0x40043d04
+	PTP_ENABLE_PPS2                  = 0x40043d0d
+	PTP_EXTTS_REQUEST                = 0x40103d02
+	PTP_EXTTS_REQUEST2               = 0x40103d0b
+	PTP_MASK_CLEAR_ALL               = 0x3d13
+	PTP_MASK_EN_SINGLE               = 0x40043d14
+	PTP_PEROUT_REQUEST               = 0x40383d03
+	PTP_PEROUT_REQUEST2              = 0x40383d0c
+	PTP_PIN_SETFUNC                  = 0x40603d07
+	PTP_PIN_SETFUNC2                 = 0x40603d10
+	PTP_SYS_OFFSET                   = 0x43403d05
+	PTP_SYS_OFFSET2                  = 0x43403d0e
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GETFPXREGS                = 0x12
 	PTRACE_GET_THREAD_AREA           = 0x19
@@ -283,6 +298,8 @@ const (
 	RTC_WIE_ON                       = 0x700f
 	RTC_WKALM_RD                     = 0x80287010
 	RTC_WKALM_SET                    = 0x4028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -321,6 +338,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go
index da428f42..c731d24f 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_amd64.go
@@ -109,6 +109,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x80084803
 	HIDIOCGRDESC                     = 0x90044802
 	HIDIOCGRDESCSIZE                 = 0x80044801
+	HIDIOCREVOKE                     = 0x4004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -237,6 +238,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x7434
 	PPPIOCXFERUNIT                   = 0x744e
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x80503d01
+	PTP_CLOCK_GETCAPS2               = 0x80503d0a
+	PTP_ENABLE_PPS                   = 0x40043d04
+	PTP_ENABLE_PPS2                  = 0x40043d0d
+	PTP_EXTTS_REQUEST                = 0x40103d02
+	PTP_EXTTS_REQUEST2               = 0x40103d0b
+	PTP_MASK_CLEAR_ALL               = 0x3d13
+	PTP_MASK_EN_SINGLE               = 0x40043d14
+	PTP_PEROUT_REQUEST               = 0x40383d03
+	PTP_PEROUT_REQUEST2              = 0x40383d0c
+	PTP_PIN_SETFUNC                  = 0x40603d07
+	PTP_PIN_SETFUNC2                 = 0x40603d10
+	PTP_SYS_OFFSET                   = 0x43403d05
+	PTP_SYS_OFFSET2                  = 0x43403d0e
 	PTRACE_ARCH_PRCTL                = 0x1e
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GETFPXREGS                = 0x12
@@ -284,6 +299,8 @@ const (
 	RTC_WIE_ON                       = 0x700f
 	RTC_WKALM_RD                     = 0x80287010
 	RTC_WKALM_SET                    = 0x4028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -322,6 +339,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go
index bf45bfec..680018a4 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x80084803
 	HIDIOCGRDESC                     = 0x90044802
 	HIDIOCGRDESCSIZE                 = 0x80044801
+	HIDIOCREVOKE                     = 0x4004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -234,6 +235,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x7434
 	PPPIOCXFERUNIT                   = 0x744e
 	PR_SET_PTRACER_ANY               = 0xffffffff
+	PTP_CLOCK_GETCAPS                = 0x80503d01
+	PTP_CLOCK_GETCAPS2               = 0x80503d0a
+	PTP_ENABLE_PPS                   = 0x40043d04
+	PTP_ENABLE_PPS2                  = 0x40043d0d
+	PTP_EXTTS_REQUEST                = 0x40103d02
+	PTP_EXTTS_REQUEST2               = 0x40103d0b
+	PTP_MASK_CLEAR_ALL               = 0x3d13
+	PTP_MASK_EN_SINGLE               = 0x40043d14
+	PTP_PEROUT_REQUEST               = 0x40383d03
+	PTP_PEROUT_REQUEST2              = 0x40383d0c
+	PTP_PIN_SETFUNC                  = 0x40603d07
+	PTP_PIN_SETFUNC2                 = 0x40603d10
+	PTP_SYS_OFFSET                   = 0x43403d05
+	PTP_SYS_OFFSET2                  = 0x43403d0e
 	PTRACE_GETCRUNCHREGS             = 0x19
 	PTRACE_GETFDPIC                  = 0x1f
 	PTRACE_GETFDPIC_EXEC             = 0x0
@@ -289,6 +304,8 @@ const (
 	RTC_WIE_ON                       = 0x700f
 	RTC_WKALM_RD                     = 0x80287010
 	RTC_WKALM_SET                    = 0x4028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -327,6 +344,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go
index 71c67162..a63909f3 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_arm64.go
@@ -112,6 +112,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x80084803
 	HIDIOCGRDESC                     = 0x90044802
 	HIDIOCGRDESCSIZE                 = 0x80044801
+	HIDIOCREVOKE                     = 0x4004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -205,6 +206,7 @@ const (
 	PERF_EVENT_IOC_SET_BPF           = 0x40042408
 	PERF_EVENT_IOC_SET_FILTER        = 0x40082406
 	PERF_EVENT_IOC_SET_OUTPUT        = 0x2405
+	POE_MAGIC                        = 0x504f4530
 	PPPIOCATTACH                     = 0x4004743d
 	PPPIOCATTCHAN                    = 0x40047438
 	PPPIOCBRIDGECHAN                 = 0x40047435
@@ -240,6 +242,20 @@ const (
 	PROT_BTI                         = 0x10
 	PROT_MTE                         = 0x20
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x80503d01
+	PTP_CLOCK_GETCAPS2               = 0x80503d0a
+	PTP_ENABLE_PPS                   = 0x40043d04
+	PTP_ENABLE_PPS2                  = 0x40043d0d
+	PTP_EXTTS_REQUEST                = 0x40103d02
+	PTP_EXTTS_REQUEST2               = 0x40103d0b
+	PTP_MASK_CLEAR_ALL               = 0x3d13
+	PTP_MASK_EN_SINGLE               = 0x40043d14
+	PTP_PEROUT_REQUEST               = 0x40383d03
+	PTP_PEROUT_REQUEST2              = 0x40383d0c
+	PTP_PIN_SETFUNC                  = 0x40603d07
+	PTP_PIN_SETFUNC2                 = 0x40603d10
+	PTP_SYS_OFFSET                   = 0x43403d05
+	PTP_SYS_OFFSET2                  = 0x43403d0e
 	PTRACE_PEEKMTETAGS               = 0x21
 	PTRACE_POKEMTETAGS               = 0x22
 	PTRACE_SYSEMU                    = 0x1f
@@ -280,6 +296,8 @@ const (
 	RTC_WIE_ON                       = 0x700f
 	RTC_WKALM_RD                     = 0x80287010
 	RTC_WKALM_SET                    = 0x4028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -318,6 +336,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go
index 9476628f..9b0a2573 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_loong64.go
@@ -109,6 +109,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x80084803
 	HIDIOCGRDESC                     = 0x90044802
 	HIDIOCGRDESCSIZE                 = 0x80044801
+	HIDIOCREVOKE                     = 0x4004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -238,6 +239,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x7434
 	PPPIOCXFERUNIT                   = 0x744e
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x80503d01
+	PTP_CLOCK_GETCAPS2               = 0x80503d0a
+	PTP_ENABLE_PPS                   = 0x40043d04
+	PTP_ENABLE_PPS2                  = 0x40043d0d
+	PTP_EXTTS_REQUEST                = 0x40103d02
+	PTP_EXTTS_REQUEST2               = 0x40103d0b
+	PTP_MASK_CLEAR_ALL               = 0x3d13
+	PTP_MASK_EN_SINGLE               = 0x40043d14
+	PTP_PEROUT_REQUEST               = 0x40383d03
+	PTP_PEROUT_REQUEST2              = 0x40383d0c
+	PTP_PIN_SETFUNC                  = 0x40603d07
+	PTP_PIN_SETFUNC2                 = 0x40603d10
+	PTP_SYS_OFFSET                   = 0x43403d05
+	PTP_SYS_OFFSET2                  = 0x43403d0e
 	PTRACE_SYSEMU                    = 0x1f
 	PTRACE_SYSEMU_SINGLESTEP         = 0x20
 	RLIMIT_AS                        = 0x9
@@ -276,6 +291,8 @@ const (
 	RTC_WIE_ON                       = 0x700f
 	RTC_WKALM_RD                     = 0x80287010
 	RTC_WKALM_SET                    = 0x4028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -314,6 +331,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go
index b9e85f3c..958e6e06 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x100
@@ -234,6 +235,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x20007434
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PR_SET_PTRACER_ANY               = 0xffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GET_THREAD_AREA           = 0x19
 	PTRACE_GET_THREAD_AREA_3264      = 0xc4
@@ -282,6 +297,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -320,6 +337,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x1029
 	SO_DONTROUTE                     = 0x10
 	SO_ERROR                         = 0x1007
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go
index a48b68a7..50c7f25b 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x100
@@ -234,6 +235,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x20007434
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GET_THREAD_AREA           = 0x19
 	PTRACE_GET_THREAD_AREA_3264      = 0xc4
@@ -282,6 +297,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -320,6 +337,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x1029
 	SO_DONTROUTE                     = 0x10
 	SO_ERROR                         = 0x1007
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go
index ea00e852..ced21d66 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mips64le.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x100
@@ -234,6 +235,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x20007434
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GET_THREAD_AREA           = 0x19
 	PTRACE_GET_THREAD_AREA_3264      = 0xc4
@@ -282,6 +297,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -320,6 +337,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x1029
 	SO_DONTROUTE                     = 0x10
 	SO_ERROR                         = 0x1007
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go
index 91c64687..226c0441 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_mipsle.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x100
@@ -234,6 +235,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x20007434
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PR_SET_PTRACER_ANY               = 0xffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GET_THREAD_AREA           = 0x19
 	PTRACE_GET_THREAD_AREA_3264      = 0xc4
@@ -282,6 +297,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -320,6 +337,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x1029
 	SO_DONTROUTE                     = 0x10
 	SO_ERROR                         = 0x1007
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go
index 8cbf38d6..3122737c 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x4000
 	ICANON                           = 0x100
 	IEXTEN                           = 0x400
@@ -237,6 +238,20 @@ const (
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PROT_SAO                         = 0x10
 	PR_SET_PTRACER_ANY               = 0xffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETEVRREGS                = 0x14
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GETREGS64                 = 0x16
@@ -337,6 +352,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -375,6 +392,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go
index a2df7341..eb5d3467 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x4000
 	ICANON                           = 0x100
 	IEXTEN                           = 0x400
@@ -237,6 +238,20 @@ const (
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PROT_SAO                         = 0x10
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETEVRREGS                = 0x14
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GETREGS64                 = 0x16
@@ -341,6 +356,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -379,6 +396,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go
index 24791379..e921ebc6 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_ppc64le.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x4000
 	ICANON                           = 0x100
 	IEXTEN                           = 0x400
@@ -237,6 +238,20 @@ const (
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PROT_SAO                         = 0x10
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETEVRREGS                = 0x14
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GETREGS64                 = 0x16
@@ -341,6 +356,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -379,6 +396,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go
index d265f146..38ba81c5 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_riscv64.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x80084803
 	HIDIOCGRDESC                     = 0x90044802
 	HIDIOCGRDESCSIZE                 = 0x80044801
+	HIDIOCREVOKE                     = 0x4004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -234,6 +235,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x7434
 	PPPIOCXFERUNIT                   = 0x744e
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x80503d01
+	PTP_CLOCK_GETCAPS2               = 0x80503d0a
+	PTP_ENABLE_PPS                   = 0x40043d04
+	PTP_ENABLE_PPS2                  = 0x40043d0d
+	PTP_EXTTS_REQUEST                = 0x40103d02
+	PTP_EXTTS_REQUEST2               = 0x40103d0b
+	PTP_MASK_CLEAR_ALL               = 0x3d13
+	PTP_MASK_EN_SINGLE               = 0x40043d14
+	PTP_PEROUT_REQUEST               = 0x40383d03
+	PTP_PEROUT_REQUEST2              = 0x40383d0c
+	PTP_PIN_SETFUNC                  = 0x40603d07
+	PTP_PIN_SETFUNC2                 = 0x40603d10
+	PTP_SYS_OFFSET                   = 0x43403d05
+	PTP_SYS_OFFSET2                  = 0x43403d0e
 	PTRACE_GETFDPIC                  = 0x21
 	PTRACE_GETFDPIC_EXEC             = 0x0
 	PTRACE_GETFDPIC_INTERP           = 0x1
@@ -273,6 +288,8 @@ const (
 	RTC_WIE_ON                       = 0x700f
 	RTC_WKALM_RD                     = 0x80287010
 	RTC_WKALM_SET                    = 0x4028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -311,6 +328,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go
index 3f2d6443..71f04009 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_s390x.go
@@ -108,6 +108,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x80084803
 	HIDIOCGRDESC                     = 0x90044802
 	HIDIOCGRDESCSIZE                 = 0x80044801
+	HIDIOCREVOKE                     = 0x4004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -234,6 +235,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x7434
 	PPPIOCXFERUNIT                   = 0x744e
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x80503d01
+	PTP_CLOCK_GETCAPS2               = 0x80503d0a
+	PTP_ENABLE_PPS                   = 0x40043d04
+	PTP_ENABLE_PPS2                  = 0x40043d0d
+	PTP_EXTTS_REQUEST                = 0x40103d02
+	PTP_EXTTS_REQUEST2               = 0x40103d0b
+	PTP_MASK_CLEAR_ALL               = 0x3d13
+	PTP_MASK_EN_SINGLE               = 0x40043d14
+	PTP_PEROUT_REQUEST               = 0x40383d03
+	PTP_PEROUT_REQUEST2              = 0x40383d0c
+	PTP_PIN_SETFUNC                  = 0x40603d07
+	PTP_PIN_SETFUNC2                 = 0x40603d10
+	PTP_SYS_OFFSET                   = 0x43403d05
+	PTP_SYS_OFFSET2                  = 0x43403d0e
 	PTRACE_DISABLE_TE                = 0x5010
 	PTRACE_ENABLE_TE                 = 0x5009
 	PTRACE_GET_LAST_BREAK            = 0x5006
@@ -345,6 +360,8 @@ const (
 	RTC_WIE_ON                       = 0x700f
 	RTC_WKALM_RD                     = 0x80287010
 	RTC_WKALM_SET                    = 0x4028700f
+	SCM_DEVMEM_DMABUF                = 0x4f
+	SCM_DEVMEM_LINEAR                = 0x4e
 	SCM_TIMESTAMPING                 = 0x25
 	SCM_TIMESTAMPING_OPT_STATS       = 0x36
 	SCM_TIMESTAMPING_PKTINFO         = 0x3a
@@ -383,6 +400,9 @@ const (
 	SO_CNX_ADVICE                    = 0x35
 	SO_COOKIE                        = 0x39
 	SO_DETACH_REUSEPORT_BPF          = 0x44
+	SO_DEVMEM_DMABUF                 = 0x4f
+	SO_DEVMEM_DONTNEED               = 0x50
+	SO_DEVMEM_LINEAR                 = 0x4e
 	SO_DOMAIN                        = 0x27
 	SO_DONTROUTE                     = 0x5
 	SO_ERROR                         = 0x4
diff --git a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go
index 5d8b727a..c44a3133 100644
--- a/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go
+++ b/vendor/golang.org/x/sys/unix/zerrors_linux_sparc64.go
@@ -112,6 +112,7 @@ const (
 	HIDIOCGRAWINFO                   = 0x40084803
 	HIDIOCGRDESC                     = 0x50044802
 	HIDIOCGRDESCSIZE                 = 0x40044801
+	HIDIOCREVOKE                     = 0x8004480d
 	HUPCL                            = 0x400
 	ICANON                           = 0x2
 	IEXTEN                           = 0x8000
@@ -239,6 +240,20 @@ const (
 	PPPIOCUNBRIDGECHAN               = 0x20007434
 	PPPIOCXFERUNIT                   = 0x2000744e
 	PR_SET_PTRACER_ANY               = 0xffffffffffffffff
+	PTP_CLOCK_GETCAPS                = 0x40503d01
+	PTP_CLOCK_GETCAPS2               = 0x40503d0a
+	PTP_ENABLE_PPS                   = 0x80043d04
+	PTP_ENABLE_PPS2                  = 0x80043d0d
+	PTP_EXTTS_REQUEST                = 0x80103d02
+	PTP_EXTTS_REQUEST2               = 0x80103d0b
+	PTP_MASK_CLEAR_ALL               = 0x20003d13
+	PTP_MASK_EN_SINGLE               = 0x80043d14
+	PTP_PEROUT_REQUEST               = 0x80383d03
+	PTP_PEROUT_REQUEST2              = 0x80383d0c
+	PTP_PIN_SETFUNC                  = 0x80603d07
+	PTP_PIN_SETFUNC2                 = 0x80603d10
+	PTP_SYS_OFFSET                   = 0x83403d05
+	PTP_SYS_OFFSET2                  = 0x83403d0e
 	PTRACE_GETFPAREGS                = 0x14
 	PTRACE_GETFPREGS                 = 0xe
 	PTRACE_GETFPREGS64               = 0x19
@@ -336,6 +351,8 @@ const (
 	RTC_WIE_ON                       = 0x2000700f
 	RTC_WKALM_RD                     = 0x40287010
 	RTC_WKALM_SET                    = 0x8028700f
+	SCM_DEVMEM_DMABUF                = 0x58
+	SCM_DEVMEM_LINEAR                = 0x57
 	SCM_TIMESTAMPING                 = 0x23
 	SCM_TIMESTAMPING_OPT_STATS       = 0x38
 	SCM_TIMESTAMPING_PKTINFO         = 0x3c
@@ -422,6 +439,9 @@ const (
 	SO_CNX_ADVICE                    = 0x37
 	SO_COOKIE                        = 0x3b
 	SO_DETACH_REUSEPORT_BPF          = 0x47
+	SO_DEVMEM_DMABUF                 = 0x58
+	SO_DEVMEM_DONTNEED               = 0x59
+	SO_DEVMEM_LINEAR                 = 0x57
 	SO_DOMAIN                        = 0x1029
 	SO_DONTROUTE                     = 0x10
 	SO_ERROR                         = 0x1007
diff --git a/vendor/golang.org/x/sys/unix/zsyscall_linux.go b/vendor/golang.org/x/sys/unix/zsyscall_linux.go
index af30da55..5cc1e8eb 100644
--- a/vendor/golang.org/x/sys/unix/zsyscall_linux.go
+++ b/vendor/golang.org/x/sys/unix/zsyscall_linux.go
@@ -592,6 +592,16 @@ func ClockGettime(clockid int32, time *Timespec) (err error) {
 
 // THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
 
+func ClockSettime(clockid int32, time *Timespec) (err error) {
+	_, _, e1 := Syscall(SYS_CLOCK_SETTIME, uintptr(clockid), uintptr(unsafe.Pointer(time)), 0)
+	if e1 != 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
+// THIS FILE IS GENERATED BY THE COMMAND AT THE TOP; DO NOT EDIT
+
 func ClockNanosleep(clockid int32, flags int, request *Timespec, remain *Timespec) (err error) {
 	_, _, e1 := Syscall6(SYS_CLOCK_NANOSLEEP, uintptr(clockid), uintptr(flags), uintptr(unsafe.Pointer(request)), uintptr(unsafe.Pointer(remain)), 0, 0)
 	if e1 != 0 {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
index d003c3d4..17c53bd9 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_amd64.go
@@ -462,11 +462,14 @@ type FdSet struct {
 
 const (
 	SizeofIfMsghdr    = 0x70
+	SizeofIfMsghdr2   = 0xa0
 	SizeofIfData      = 0x60
+	SizeofIfData64    = 0x80
 	SizeofIfaMsghdr   = 0x14
 	SizeofIfmaMsghdr  = 0x10
 	SizeofIfmaMsghdr2 = 0x14
 	SizeofRtMsghdr    = 0x5c
+	SizeofRtMsghdr2   = 0x5c
 	SizeofRtMetrics   = 0x38
 )
 
@@ -480,6 +483,20 @@ type IfMsghdr struct {
 	Data    IfData
 }
 
+type IfMsghdr2 struct {
+	Msglen     uint16
+	Version    uint8
+	Type       uint8
+	Addrs      int32
+	Flags      int32
+	Index      uint16
+	Snd_len    int32
+	Snd_maxlen int32
+	Snd_drops  int32
+	Timer      int32
+	Data       IfData64
+}
+
 type IfData struct {
 	Type       uint8
 	Typelen    uint8
@@ -512,6 +529,34 @@ type IfData struct {
 	Reserved2  uint32
 }
 
+type IfData64 struct {
+	Type       uint8
+	Typelen    uint8
+	Physical   uint8
+	Addrlen    uint8
+	Hdrlen     uint8
+	Recvquota  uint8
+	Xmitquota  uint8
+	Unused1    uint8
+	Mtu        uint32
+	Metric     uint32
+	Baudrate   uint64
+	Ipackets   uint64
+	Ierrors    uint64
+	Opackets   uint64
+	Oerrors    uint64
+	Collisions uint64
+	Ibytes     uint64
+	Obytes     uint64
+	Imcasts    uint64
+	Omcasts    uint64
+	Iqdrops    uint64
+	Noproto    uint64
+	Recvtiming uint32
+	Xmittiming uint32
+	Lastchange Timeval32
+}
+
 type IfaMsghdr struct {
 	Msglen  uint16
 	Version uint8
@@ -557,6 +602,21 @@ type RtMsghdr struct {
 	Rmx     RtMetrics
 }
 
+type RtMsghdr2 struct {
+	Msglen      uint16
+	Version     uint8
+	Type        uint8
+	Index       uint16
+	Flags       int32
+	Addrs       int32
+	Refcnt      int32
+	Parentflags int32
+	Reserved    int32
+	Use         int32
+	Inits       uint32
+	Rmx         RtMetrics
+}
+
 type RtMetrics struct {
 	Locks    uint32
 	Mtu      uint32
diff --git a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
index 0d45a941..2392226a 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_darwin_arm64.go
@@ -462,11 +462,14 @@ type FdSet struct {
 
 const (
 	SizeofIfMsghdr    = 0x70
+	SizeofIfMsghdr2   = 0xa0
 	SizeofIfData      = 0x60
+	SizeofIfData64    = 0x80
 	SizeofIfaMsghdr   = 0x14
 	SizeofIfmaMsghdr  = 0x10
 	SizeofIfmaMsghdr2 = 0x14
 	SizeofRtMsghdr    = 0x5c
+	SizeofRtMsghdr2   = 0x5c
 	SizeofRtMetrics   = 0x38
 )
 
@@ -480,6 +483,20 @@ type IfMsghdr struct {
 	Data    IfData
 }
 
+type IfMsghdr2 struct {
+	Msglen     uint16
+	Version    uint8
+	Type       uint8
+	Addrs      int32
+	Flags      int32
+	Index      uint16
+	Snd_len    int32
+	Snd_maxlen int32
+	Snd_drops  int32
+	Timer      int32
+	Data       IfData64
+}
+
 type IfData struct {
 	Type       uint8
 	Typelen    uint8
@@ -512,6 +529,34 @@ type IfData struct {
 	Reserved2  uint32
 }
 
+type IfData64 struct {
+	Type       uint8
+	Typelen    uint8
+	Physical   uint8
+	Addrlen    uint8
+	Hdrlen     uint8
+	Recvquota  uint8
+	Xmitquota  uint8
+	Unused1    uint8
+	Mtu        uint32
+	Metric     uint32
+	Baudrate   uint64
+	Ipackets   uint64
+	Ierrors    uint64
+	Opackets   uint64
+	Oerrors    uint64
+	Collisions uint64
+	Ibytes     uint64
+	Obytes     uint64
+	Imcasts    uint64
+	Omcasts    uint64
+	Iqdrops    uint64
+	Noproto    uint64
+	Recvtiming uint32
+	Xmittiming uint32
+	Lastchange Timeval32
+}
+
 type IfaMsghdr struct {
 	Msglen  uint16
 	Version uint8
@@ -557,6 +602,21 @@ type RtMsghdr struct {
 	Rmx     RtMetrics
 }
 
+type RtMsghdr2 struct {
+	Msglen      uint16
+	Version     uint8
+	Type        uint8
+	Index       uint16
+	Flags       int32
+	Addrs       int32
+	Refcnt      int32
+	Parentflags int32
+	Reserved    int32
+	Use         int32
+	Inits       uint32
+	Rmx         RtMetrics
+}
+
 type RtMetrics struct {
 	Locks    uint32
 	Mtu      uint32
diff --git a/vendor/golang.org/x/sys/unix/ztypes_linux.go b/vendor/golang.org/x/sys/unix/ztypes_linux.go
index 3a69e454..5537148d 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_linux.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_linux.go
@@ -1752,12 +1752,6 @@ const (
 	IFLA_IPVLAN_UNSPEC                         = 0x0
 	IFLA_IPVLAN_MODE                           = 0x1
 	IFLA_IPVLAN_FLAGS                          = 0x2
-	NETKIT_NEXT                                = -0x1
-	NETKIT_PASS                                = 0x0
-	NETKIT_DROP                                = 0x2
-	NETKIT_REDIRECT                            = 0x7
-	NETKIT_L2                                  = 0x0
-	NETKIT_L3                                  = 0x1
 	IFLA_NETKIT_UNSPEC                         = 0x0
 	IFLA_NETKIT_PEER_INFO                      = 0x1
 	IFLA_NETKIT_PRIMARY                        = 0x2
@@ -1796,6 +1790,7 @@ const (
 	IFLA_VXLAN_DF                              = 0x1d
 	IFLA_VXLAN_VNIFILTER                       = 0x1e
 	IFLA_VXLAN_LOCALBYPASS                     = 0x1f
+	IFLA_VXLAN_LABEL_POLICY                    = 0x20
 	IFLA_GENEVE_UNSPEC                         = 0x0
 	IFLA_GENEVE_ID                             = 0x1
 	IFLA_GENEVE_REMOTE                         = 0x2
@@ -1825,6 +1820,8 @@ const (
 	IFLA_GTP_ROLE                              = 0x4
 	IFLA_GTP_CREATE_SOCKETS                    = 0x5
 	IFLA_GTP_RESTART_COUNT                     = 0x6
+	IFLA_GTP_LOCAL                             = 0x7
+	IFLA_GTP_LOCAL6                            = 0x8
 	IFLA_BOND_UNSPEC                           = 0x0
 	IFLA_BOND_MODE                             = 0x1
 	IFLA_BOND_ACTIVE_SLAVE                     = 0x2
@@ -1857,6 +1854,7 @@ const (
 	IFLA_BOND_AD_LACP_ACTIVE                   = 0x1d
 	IFLA_BOND_MISSED_MAX                       = 0x1e
 	IFLA_BOND_NS_IP6_TARGET                    = 0x1f
+	IFLA_BOND_COUPLED_CONTROL                  = 0x20
 	IFLA_BOND_AD_INFO_UNSPEC                   = 0x0
 	IFLA_BOND_AD_INFO_AGGREGATOR               = 0x1
 	IFLA_BOND_AD_INFO_NUM_PORTS                = 0x2
@@ -1925,6 +1923,7 @@ const (
 	IFLA_HSR_SEQ_NR                            = 0x5
 	IFLA_HSR_VERSION                           = 0x6
 	IFLA_HSR_PROTOCOL                          = 0x7
+	IFLA_HSR_INTERLINK                         = 0x8
 	IFLA_STATS_UNSPEC                          = 0x0
 	IFLA_STATS_LINK_64                         = 0x1
 	IFLA_STATS_LINK_XSTATS                     = 0x2
@@ -1977,6 +1976,15 @@ const (
 	IFLA_DSA_MASTER                            = 0x1
 )
 
+const (
+	NETKIT_NEXT     = -0x1
+	NETKIT_PASS     = 0x0
+	NETKIT_DROP     = 0x2
+	NETKIT_REDIRECT = 0x7
+	NETKIT_L2       = 0x0
+	NETKIT_L3       = 0x1
+)
+
 const (
 	NF_INET_PRE_ROUTING  = 0x0
 	NF_INET_LOCAL_IN     = 0x1
@@ -2586,8 +2594,8 @@ const (
 	SOF_TIMESTAMPING_BIND_PHC     = 0x8000
 	SOF_TIMESTAMPING_OPT_ID_TCP   = 0x10000
 
-	SOF_TIMESTAMPING_LAST = 0x10000
-	SOF_TIMESTAMPING_MASK = 0x1ffff
+	SOF_TIMESTAMPING_LAST = 0x20000
+	SOF_TIMESTAMPING_MASK = 0x3ffff
 
 	SCM_TSTAMP_SND   = 0x0
 	SCM_TSTAMP_SCHED = 0x1
@@ -3533,7 +3541,7 @@ type Nhmsg struct {
 type NexthopGrp struct {
 	Id     uint32
 	Weight uint8
-	Resvd1 uint8
+	High   uint8
 	Resvd2 uint16
 }
 
@@ -3794,7 +3802,7 @@ const (
 	ETHTOOL_MSG_PSE_GET                       = 0x24
 	ETHTOOL_MSG_PSE_SET                       = 0x25
 	ETHTOOL_MSG_RSS_GET                       = 0x26
-	ETHTOOL_MSG_USER_MAX                      = 0x2c
+	ETHTOOL_MSG_USER_MAX                      = 0x2d
 	ETHTOOL_MSG_KERNEL_NONE                   = 0x0
 	ETHTOOL_MSG_STRSET_GET_REPLY              = 0x1
 	ETHTOOL_MSG_LINKINFO_GET_REPLY            = 0x2
@@ -3834,7 +3842,7 @@ const (
 	ETHTOOL_MSG_MODULE_NTF                    = 0x24
 	ETHTOOL_MSG_PSE_GET_REPLY                 = 0x25
 	ETHTOOL_MSG_RSS_GET_REPLY                 = 0x26
-	ETHTOOL_MSG_KERNEL_MAX                    = 0x2c
+	ETHTOOL_MSG_KERNEL_MAX                    = 0x2e
 	ETHTOOL_FLAG_COMPACT_BITSETS              = 0x1
 	ETHTOOL_FLAG_OMIT_REPLY                   = 0x2
 	ETHTOOL_FLAG_STATS                        = 0x4
@@ -3842,7 +3850,7 @@ const (
 	ETHTOOL_A_HEADER_DEV_INDEX                = 0x1
 	ETHTOOL_A_HEADER_DEV_NAME                 = 0x2
 	ETHTOOL_A_HEADER_FLAGS                    = 0x3
-	ETHTOOL_A_HEADER_MAX                      = 0x3
+	ETHTOOL_A_HEADER_MAX                      = 0x4
 	ETHTOOL_A_BITSET_BIT_UNSPEC               = 0x0
 	ETHTOOL_A_BITSET_BIT_INDEX                = 0x1
 	ETHTOOL_A_BITSET_BIT_NAME                 = 0x2
@@ -4023,11 +4031,11 @@ const (
 	ETHTOOL_A_CABLE_RESULT_UNSPEC             = 0x0
 	ETHTOOL_A_CABLE_RESULT_PAIR               = 0x1
 	ETHTOOL_A_CABLE_RESULT_CODE               = 0x2
-	ETHTOOL_A_CABLE_RESULT_MAX                = 0x2
+	ETHTOOL_A_CABLE_RESULT_MAX                = 0x3
 	ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC       = 0x0
 	ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR         = 0x1
 	ETHTOOL_A_CABLE_FAULT_LENGTH_CM           = 0x2
-	ETHTOOL_A_CABLE_FAULT_LENGTH_MAX          = 0x2
+	ETHTOOL_A_CABLE_FAULT_LENGTH_MAX          = 0x3
 	ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC    = 0x0
 	ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED   = 0x1
 	ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED = 0x2
@@ -4110,6 +4118,107 @@ type EthtoolDrvinfo struct {
 	Regdump_len  uint32
 }
 
+type EthtoolTsInfo struct {
+	Cmd             uint32
+	So_timestamping uint32
+	Phc_index       int32
+	Tx_types        uint32
+	Tx_reserved     [3]uint32
+	Rx_filters      uint32
+	Rx_reserved     [3]uint32
+}
+
+type HwTstampConfig struct {
+	Flags     int32
+	Tx_type   int32
+	Rx_filter int32
+}
+
+const (
+	HWTSTAMP_FILTER_NONE            = 0x0
+	HWTSTAMP_FILTER_ALL             = 0x1
+	HWTSTAMP_FILTER_SOME            = 0x2
+	HWTSTAMP_FILTER_PTP_V1_L4_EVENT = 0x3
+	HWTSTAMP_FILTER_PTP_V2_L4_EVENT = 0x6
+	HWTSTAMP_FILTER_PTP_V2_L2_EVENT = 0x9
+	HWTSTAMP_FILTER_PTP_V2_EVENT    = 0xc
+)
+
+const (
+	HWTSTAMP_TX_OFF          = 0x0
+	HWTSTAMP_TX_ON           = 0x1
+	HWTSTAMP_TX_ONESTEP_SYNC = 0x2
+)
+
+type (
+	PtpClockCaps struct {
+		Max_adj            int32
+		N_alarm            int32
+		N_ext_ts           int32
+		N_per_out          int32
+		Pps                int32
+		N_pins             int32
+		Cross_timestamping int32
+		Adjust_phase       int32
+		Max_phase_adj      int32
+		Rsv                [11]int32
+	}
+	PtpClockTime struct {
+		Sec      int64
+		Nsec     uint32
+		Reserved uint32
+	}
+	PtpExttsEvent struct {
+		T     PtpClockTime
+		Index uint32
+		Flags uint32
+		Rsv   [2]uint32
+	}
+	PtpExttsRequest struct {
+		Index uint32
+		Flags uint32
+		Rsv   [2]uint32
+	}
+	PtpPeroutRequest struct {
+		StartOrPhase PtpClockTime
+		Period       PtpClockTime
+		Index        uint32
+		Flags        uint32
+		On           PtpClockTime
+	}
+	PtpPinDesc struct {
+		Name  [64]byte
+		Index uint32
+		Func  uint32
+		Chan  uint32
+		Rsv   [5]uint32
+	}
+	PtpSysOffset struct {
+		Samples uint32
+		Rsv     [3]uint32
+		Ts      [51]PtpClockTime
+	}
+	PtpSysOffsetExtended struct {
+		Samples uint32
+		Clockid int32
+		Rsv     [2]uint32
+		Ts      [25][3]PtpClockTime
+	}
+	PtpSysOffsetPrecise struct {
+		Device   PtpClockTime
+		Realtime PtpClockTime
+		Monoraw  PtpClockTime
+		Rsv      [4]uint32
+	}
+)
+
+const (
+	PTP_PF_NONE    = 0x0
+	PTP_PF_EXTTS   = 0x1
+	PTP_PF_PEROUT  = 0x2
+	PTP_PF_PHYSYNC = 0x3
+)
+
 type (
 	HIDRawReportDescriptor struct {
 		Size  uint32
@@ -4291,6 +4400,7 @@ const (
 type LandlockRulesetAttr struct {
 	Access_fs  uint64
 	Access_net uint64
+	Scoped     uint64
 }
 
 type LandlockPathBeneathAttr struct {
diff --git a/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go b/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go
index d9a13af4..2e5d5a44 100644
--- a/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go
+++ b/vendor/golang.org/x/sys/unix/ztypes_zos_s390x.go
@@ -377,6 +377,12 @@ type Flock_t struct {
 	Pid    int32
 }
 
+type F_cnvrt struct {
+	Cvtcmd int32
+	Pccsid int16
+	Fccsid int16
+}
+
 type Termios struct {
 	Cflag uint32
 	Iflag uint32
diff --git a/vendor/golang.org/x/sys/windows/syscall_windows.go b/vendor/golang.org/x/sys/windows/syscall_windows.go
index 5cee9a31..4a325438 100644
--- a/vendor/golang.org/x/sys/windows/syscall_windows.go
+++ b/vendor/golang.org/x/sys/windows/syscall_windows.go
@@ -168,6 +168,8 @@ func NewCallbackCDecl(fn interface{}) uintptr {
 //sys	CreateNamedPipe(name *uint16, flags uint32, pipeMode uint32, maxInstances uint32, outSize uint32, inSize uint32, defaultTimeout uint32, sa *SecurityAttributes) (handle Handle, err error)  [failretval==InvalidHandle] = CreateNamedPipeW
 //sys	ConnectNamedPipe(pipe Handle, overlapped *Overlapped) (err error)
 //sys	DisconnectNamedPipe(pipe Handle) (err error)
+//sys   GetNamedPipeClientProcessId(pipe Handle, clientProcessID *uint32) (err error)
+//sys   GetNamedPipeServerProcessId(pipe Handle, serverProcessID *uint32) (err error)
 //sys	GetNamedPipeInfo(pipe Handle, flags *uint32, outSize *uint32, inSize *uint32, maxInstances *uint32) (err error)
 //sys	GetNamedPipeHandleState(pipe Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) = GetNamedPipeHandleStateW
 //sys	SetNamedPipeHandleState(pipe Handle, state *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32) (err error) = SetNamedPipeHandleState
@@ -725,20 +727,12 @@ func DurationSinceBoot() time.Duration {
 }
 
 func Ftruncate(fd Handle, length int64) (err error) {
-	curoffset, e := Seek(fd, 0, 1)
-	if e != nil {
-		return e
-	}
-	defer Seek(fd, curoffset, 0)
-	_, e = Seek(fd, length, 0)
-	if e != nil {
-		return e
+	type _FILE_END_OF_FILE_INFO struct {
+		EndOfFile int64
 	}
-	e = SetEndOfFile(fd)
-	if e != nil {
-		return e
-	}
-	return nil
+	var info _FILE_END_OF_FILE_INFO
+	info.EndOfFile = length
+	return SetFileInformationByHandle(fd, FileEndOfFileInfo, (*byte)(unsafe.Pointer(&info)), uint32(unsafe.Sizeof(info)))
 }
 
 func Gettimeofday(tv *Timeval) (err error) {
@@ -894,6 +888,11 @@ const socket_error = uintptr(^uint32(0))
 //sys	GetACP() (acp uint32) = kernel32.GetACP
 //sys	MultiByteToWideChar(codePage uint32, dwFlags uint32, str *byte, nstr int32, wchar *uint16, nwchar int32) (nwrite int32, err error) = kernel32.MultiByteToWideChar
 //sys	getBestInterfaceEx(sockaddr unsafe.Pointer, pdwBestIfIndex *uint32) (errcode error) = iphlpapi.GetBestInterfaceEx
+//sys   GetIfEntry2Ex(level uint32, row *MibIfRow2) (errcode error) = iphlpapi.GetIfEntry2Ex
+//sys   GetUnicastIpAddressEntry(row *MibUnicastIpAddressRow) (errcode error) = iphlpapi.GetUnicastIpAddressEntry
+//sys   NotifyIpInterfaceChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) = iphlpapi.NotifyIpInterfaceChange
+//sys   NotifyUnicastIpAddressChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) = iphlpapi.NotifyUnicastIpAddressChange
+//sys   CancelMibChangeNotify2(notificationHandle Handle) (errcode error) = iphlpapi.CancelMibChangeNotify2
 
 // For testing: clients can set this flag to force
 // creation of IPv6 sockets to return EAFNOSUPPORT.
@@ -1685,13 +1684,16 @@ func (s NTStatus) Error() string {
 // do not use NTUnicodeString, and instead UTF16PtrFromString should be used for
 // the more common *uint16 string type.
 func NewNTUnicodeString(s string) (*NTUnicodeString, error) {
-	var u NTUnicodeString
-	s16, err := UTF16PtrFromString(s)
+	s16, err := UTF16FromString(s)
 	if err != nil {
 		return nil, err
 	}
-	RtlInitUnicodeString(&u, s16)
-	return &u, nil
+	n := uint16(len(s16) * 2)
+	return &NTUnicodeString{
+		Length:        n - 2, // subtract 2 bytes for the NULL terminator
+		MaximumLength: n,
+		Buffer:        &s16[0],
+	}, nil
 }
 
 // Slice returns a uint16 slice that aliases the data in the NTUnicodeString.
diff --git a/vendor/golang.org/x/sys/windows/types_windows.go b/vendor/golang.org/x/sys/windows/types_windows.go
index 7b97a154..9d138de5 100644
--- a/vendor/golang.org/x/sys/windows/types_windows.go
+++ b/vendor/golang.org/x/sys/windows/types_windows.go
@@ -176,6 +176,7 @@ const (
 	WAIT_FAILED    = 0xFFFFFFFF
 
 	// Access rights for process.
+	PROCESS_ALL_ACCESS                = 0xFFFF
 	PROCESS_CREATE_PROCESS            = 0x0080
 	PROCESS_CREATE_THREAD             = 0x0002
 	PROCESS_DUP_HANDLE                = 0x0040
@@ -2203,6 +2204,132 @@ const (
 	IfOperStatusLowerLayerDown = 7
 )
 
+const (
+	IF_MAX_PHYS_ADDRESS_LENGTH = 32
+	IF_MAX_STRING_SIZE         = 256
+)
+
+// MIB_IF_ENTRY_LEVEL enumeration from netioapi.h or
+// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/nf-netioapi-getifentry2ex.
+const (
+	MibIfEntryNormal                  = 0
+	MibIfEntryNormalWithoutStatistics = 2
+)
+
+// MIB_NOTIFICATION_TYPE enumeration from netioapi.h or
+// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ne-netioapi-mib_notification_type.
+const (
+	MibParameterNotification = 0
+	MibAddInstance           = 1
+	MibDeleteInstance        = 2
+	MibInitialNotification   = 3
+)
+
+// MibIfRow2 stores information about a particular interface. See
+// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_if_row2.
+type MibIfRow2 struct {
+	InterfaceLuid               uint64
+	InterfaceIndex              uint32
+	InterfaceGuid               GUID
+	Alias                       [IF_MAX_STRING_SIZE + 1]uint16
+	Description                 [IF_MAX_STRING_SIZE + 1]uint16
+	PhysicalAddressLength       uint32
+	PhysicalAddress             [IF_MAX_PHYS_ADDRESS_LENGTH]uint8
+	PermanentPhysicalAddress    [IF_MAX_PHYS_ADDRESS_LENGTH]uint8
+	Mtu                         uint32
+	Type                        uint32
+	TunnelType                  uint32
+	MediaType                   uint32
+	PhysicalMediumType          uint32
+	AccessType                  uint32
+	DirectionType               uint32
+	InterfaceAndOperStatusFlags uint8
+	OperStatus                  uint32
+	AdminStatus                 uint32
+	MediaConnectState           uint32
+	NetworkGuid                 GUID
+	ConnectionType              uint32
+	TransmitLinkSpeed           uint64
+	ReceiveLinkSpeed            uint64
+	InOctets                    uint64
+	InUcastPkts                 uint64
+	InNUcastPkts                uint64
+	InDiscards                  uint64
+	InErrors                    uint64
+	InUnknownProtos             uint64
+	InUcastOctets               uint64
+	InMulticastOctets           uint64
+	InBroadcastOctets           uint64
+	OutOctets                   uint64
+	OutUcastPkts                uint64
+	OutNUcastPkts               uint64
+	OutDiscards                 uint64
+	OutErrors                   uint64
+	OutUcastOctets              uint64
+	OutMulticastOctets          uint64
+	OutBroadcastOctets          uint64
+	OutQLen                     uint64
+}
+
+// MIB_UNICASTIPADDRESS_ROW stores information about a unicast IP address. See
+// https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_unicastipaddress_row.
+type MibUnicastIpAddressRow struct {
+	Address            RawSockaddrInet6 // SOCKADDR_INET union
+	InterfaceLuid      uint64
+	InterfaceIndex     uint32
+	PrefixOrigin       uint32
+	SuffixOrigin       uint32
+	ValidLifetime      uint32
+	PreferredLifetime  uint32
+	OnLinkPrefixLength uint8
+	SkipAsSource       uint8
+	DadState           uint32
+	ScopeId            uint32
+	CreationTimeStamp  Filetime
+}
+
+const ScopeLevelCount = 16
+
+// MIB_IPINTERFACE_ROW stores interface management information for a particular IP address family on a network interface.
+// See https://learn.microsoft.com/en-us/windows/win32/api/netioapi/ns-netioapi-mib_ipinterface_row.
+type MibIpInterfaceRow struct {
+	Family                               uint16
+	InterfaceLuid                        uint64
+	InterfaceIndex                       uint32
+	MaxReassemblySize                    uint32
+	InterfaceIdentifier                  uint64
+	MinRouterAdvertisementInterval       uint32
+	MaxRouterAdvertisementInterval       uint32
+	AdvertisingEnabled                   uint8
+	ForwardingEnabled                    uint8
+	WeakHostSend                         uint8
+	WeakHostReceive                      uint8
+	UseAutomaticMetric                   uint8
+	UseNeighborUnreachabilityDetection   uint8
+	ManagedAddressConfigurationSupported uint8
+	OtherStatefulConfigurationSupported  uint8
+	AdvertiseDefaultRoute                uint8
+	RouterDiscoveryBehavior              uint32
+	DadTransmits                         uint32
+	BaseReachableTime                    uint32
+	RetransmitTime                       uint32
+	PathMtuDiscoveryTimeout              uint32
+	LinkLocalAddressBehavior             uint32
+	LinkLocalAddressTimeout              uint32
+	ZoneIndices                          [ScopeLevelCount]uint32
+	SitePrefixLength                     uint32
+	Metric                               uint32
+	NlMtu                                uint32
+	Connected                            uint8
+	SupportsWakeUpPatterns               uint8
+	SupportsNeighborDiscovery            uint8
+	SupportsRouterDiscovery              uint8
+	ReachableTime                        uint32
+	TransmitOffload                      uint32
+	ReceiveOffload                       uint32
+	DisableDefaultRoutes                 uint8
+}
+
 // Console related constants used for the mode parameter to SetConsoleMode. See
 // https://docs.microsoft.com/en-us/windows/console/setconsolemode for details.
 
diff --git a/vendor/golang.org/x/sys/windows/zsyscall_windows.go b/vendor/golang.org/x/sys/windows/zsyscall_windows.go
index 4c2e1bdc..01c0716c 100644
--- a/vendor/golang.org/x/sys/windows/zsyscall_windows.go
+++ b/vendor/golang.org/x/sys/windows/zsyscall_windows.go
@@ -181,10 +181,15 @@ var (
 	procDnsRecordListFree                                    = moddnsapi.NewProc("DnsRecordListFree")
 	procDwmGetWindowAttribute                                = moddwmapi.NewProc("DwmGetWindowAttribute")
 	procDwmSetWindowAttribute                                = moddwmapi.NewProc("DwmSetWindowAttribute")
+	procCancelMibChangeNotify2                               = modiphlpapi.NewProc("CancelMibChangeNotify2")
 	procGetAdaptersAddresses                                 = modiphlpapi.NewProc("GetAdaptersAddresses")
 	procGetAdaptersInfo                                      = modiphlpapi.NewProc("GetAdaptersInfo")
 	procGetBestInterfaceEx                                   = modiphlpapi.NewProc("GetBestInterfaceEx")
 	procGetIfEntry                                           = modiphlpapi.NewProc("GetIfEntry")
+	procGetIfEntry2Ex                                        = modiphlpapi.NewProc("GetIfEntry2Ex")
+	procGetUnicastIpAddressEntry                             = modiphlpapi.NewProc("GetUnicastIpAddressEntry")
+	procNotifyIpInterfaceChange                              = modiphlpapi.NewProc("NotifyIpInterfaceChange")
+	procNotifyUnicastIpAddressChange                         = modiphlpapi.NewProc("NotifyUnicastIpAddressChange")
 	procAddDllDirectory                                      = modkernel32.NewProc("AddDllDirectory")
 	procAssignProcessToJobObject                             = modkernel32.NewProc("AssignProcessToJobObject")
 	procCancelIo                                             = modkernel32.NewProc("CancelIo")
@@ -275,8 +280,10 @@ var (
 	procGetMaximumProcessorCount                             = modkernel32.NewProc("GetMaximumProcessorCount")
 	procGetModuleFileNameW                                   = modkernel32.NewProc("GetModuleFileNameW")
 	procGetModuleHandleExW                                   = modkernel32.NewProc("GetModuleHandleExW")
+	procGetNamedPipeClientProcessId                          = modkernel32.NewProc("GetNamedPipeClientProcessId")
 	procGetNamedPipeHandleStateW                             = modkernel32.NewProc("GetNamedPipeHandleStateW")
 	procGetNamedPipeInfo                                     = modkernel32.NewProc("GetNamedPipeInfo")
+	procGetNamedPipeServerProcessId                          = modkernel32.NewProc("GetNamedPipeServerProcessId")
 	procGetOverlappedResult                                  = modkernel32.NewProc("GetOverlappedResult")
 	procGetPriorityClass                                     = modkernel32.NewProc("GetPriorityClass")
 	procGetProcAddress                                       = modkernel32.NewProc("GetProcAddress")
@@ -1606,6 +1613,14 @@ func DwmSetWindowAttribute(hwnd HWND, attribute uint32, value unsafe.Pointer, si
 	return
 }
 
+func CancelMibChangeNotify2(notificationHandle Handle) (errcode error) {
+	r0, _, _ := syscall.Syscall(procCancelMibChangeNotify2.Addr(), 1, uintptr(notificationHandle), 0, 0)
+	if r0 != 0 {
+		errcode = syscall.Errno(r0)
+	}
+	return
+}
+
 func GetAdaptersAddresses(family uint32, flags uint32, reserved uintptr, adapterAddresses *IpAdapterAddresses, sizePointer *uint32) (errcode error) {
 	r0, _, _ := syscall.Syscall6(procGetAdaptersAddresses.Addr(), 5, uintptr(family), uintptr(flags), uintptr(reserved), uintptr(unsafe.Pointer(adapterAddresses)), uintptr(unsafe.Pointer(sizePointer)), 0)
 	if r0 != 0 {
@@ -1638,6 +1653,46 @@ func GetIfEntry(pIfRow *MibIfRow) (errcode error) {
 	return
 }
 
+func GetIfEntry2Ex(level uint32, row *MibIfRow2) (errcode error) {
+	r0, _, _ := syscall.Syscall(procGetIfEntry2Ex.Addr(), 2, uintptr(level), uintptr(unsafe.Pointer(row)), 0)
+	if r0 != 0 {
+		errcode = syscall.Errno(r0)
+	}
+	return
+}
+
+func GetUnicastIpAddressEntry(row *MibUnicastIpAddressRow) (errcode error) {
+	r0, _, _ := syscall.Syscall(procGetUnicastIpAddressEntry.Addr(), 1, uintptr(unsafe.Pointer(row)), 0, 0)
+	if r0 != 0 {
+		errcode = syscall.Errno(r0)
+	}
+	return
+}
+
+func NotifyIpInterfaceChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) {
+	var _p0 uint32
+	if initialNotification {
+		_p0 = 1
+	}
+	r0, _, _ := syscall.Syscall6(procNotifyIpInterfaceChange.Addr(), 5, uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle)), 0)
+	if r0 != 0 {
+		errcode = syscall.Errno(r0)
+	}
+	return
+}
+
+func NotifyUnicastIpAddressChange(family uint16, callback uintptr, callerContext unsafe.Pointer, initialNotification bool, notificationHandle *Handle) (errcode error) {
+	var _p0 uint32
+	if initialNotification {
+		_p0 = 1
+	}
+	r0, _, _ := syscall.Syscall6(procNotifyUnicastIpAddressChange.Addr(), 5, uintptr(family), uintptr(callback), uintptr(callerContext), uintptr(_p0), uintptr(unsafe.Pointer(notificationHandle)), 0)
+	if r0 != 0 {
+		errcode = syscall.Errno(r0)
+	}
+	return
+}
+
 func AddDllDirectory(path *uint16) (cookie uintptr, err error) {
 	r0, _, e1 := syscall.Syscall(procAddDllDirectory.Addr(), 1, uintptr(unsafe.Pointer(path)), 0, 0)
 	cookie = uintptr(r0)
@@ -2393,6 +2448,14 @@ func GetModuleHandleEx(flags uint32, moduleName *uint16, module *Handle) (err er
 	return
 }
 
+func GetNamedPipeClientProcessId(pipe Handle, clientProcessID *uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procGetNamedPipeClientProcessId.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(clientProcessID)), 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
 func GetNamedPipeHandleState(pipe Handle, state *uint32, curInstances *uint32, maxCollectionCount *uint32, collectDataTimeout *uint32, userName *uint16, maxUserNameSize uint32) (err error) {
 	r1, _, e1 := syscall.Syscall9(procGetNamedPipeHandleStateW.Addr(), 7, uintptr(pipe), uintptr(unsafe.Pointer(state)), uintptr(unsafe.Pointer(curInstances)), uintptr(unsafe.Pointer(maxCollectionCount)), uintptr(unsafe.Pointer(collectDataTimeout)), uintptr(unsafe.Pointer(userName)), uintptr(maxUserNameSize), 0, 0)
 	if r1 == 0 {
@@ -2409,6 +2472,14 @@ func GetNamedPipeInfo(pipe Handle, flags *uint32, outSize *uint32, inSize *uint3
 	return
 }
 
+func GetNamedPipeServerProcessId(pipe Handle, serverProcessID *uint32) (err error) {
+	r1, _, e1 := syscall.Syscall(procGetNamedPipeServerProcessId.Addr(), 2, uintptr(pipe), uintptr(unsafe.Pointer(serverProcessID)), 0)
+	if r1 == 0 {
+		err = errnoErr(e1)
+	}
+	return
+}
+
 func GetOverlappedResult(handle Handle, overlapped *Overlapped, done *uint32, wait bool) (err error) {
 	var _p0 uint32
 	if wait {
diff --git a/vendor/golang.org/x/term/README.md b/vendor/golang.org/x/term/README.md
index d03d0aef..05ff623f 100644
--- a/vendor/golang.org/x/term/README.md
+++ b/vendor/golang.org/x/term/README.md
@@ -4,16 +4,13 @@
 
 This repository provides Go terminal and console support packages.
 
-## Download/Install
-
-The easiest way to install is to run `go get -u golang.org/x/term`. You can
-also manually git clone the repository to `$GOPATH/src/golang.org/x/term`.
-
 ## Report Issues / Send Patches
 
 This repository uses Gerrit for code changes. To learn how to submit changes to
-this repository, see https://golang.org/doc/contribute.html.
+this repository, see https://go.dev/doc/contribute.
+
+The git repository is https://go.googlesource.com/term.
 
 The main issue tracker for the term repository is located at
-https://github.com/golang/go/issues. Prefix your issue with "x/term:" in the
+https://go.dev/issues. Prefix your issue with "x/term:" in the
 subject line, so it is easy to find.
diff --git a/vendor/golang.org/x/term/term_windows.go b/vendor/golang.org/x/term/term_windows.go
index 465f5606..df6bf948 100644
--- a/vendor/golang.org/x/term/term_windows.go
+++ b/vendor/golang.org/x/term/term_windows.go
@@ -26,6 +26,7 @@ func makeRaw(fd int) (*State, error) {
 		return nil, err
 	}
 	raw := st &^ (windows.ENABLE_ECHO_INPUT | windows.ENABLE_PROCESSED_INPUT | windows.ENABLE_LINE_INPUT | windows.ENABLE_PROCESSED_OUTPUT)
+	raw |= windows.ENABLE_VIRTUAL_TERMINAL_INPUT
 	if err := windows.SetConsoleMode(windows.Handle(fd), raw); err != nil {
 		return nil, err
 	}
diff --git a/vendor/modules.txt b/vendor/modules.txt
index 90d97cb3..e4d850f0 100644
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@@ -105,6 +105,9 @@ github.com/fxamacker/cbor/v2
 # github.com/gavv/monotime v0.0.0-20190418164738-30dba4353424
 ## explicit
 github.com/gavv/monotime
+# github.com/go-ini/ini v1.67.0
+## explicit
+github.com/go-ini/ini
 # github.com/go-kit/kit v0.13.0
 ## explicit; go 1.17
 github.com/go-kit/kit/log
@@ -134,6 +137,17 @@ github.com/go-openapi/jsonreference/internal
 # github.com/go-openapi/swag v0.22.4
 ## explicit; go 1.18
 github.com/go-openapi/swag
+# github.com/goccy/go-json v0.10.3
+## explicit; go 1.19
+github.com/goccy/go-json
+github.com/goccy/go-json/internal/decoder
+github.com/goccy/go-json/internal/encoder
+github.com/goccy/go-json/internal/encoder/vm
+github.com/goccy/go-json/internal/encoder/vm_color
+github.com/goccy/go-json/internal/encoder/vm_color_indent
+github.com/goccy/go-json/internal/encoder/vm_indent
+github.com/goccy/go-json/internal/errors
+github.com/goccy/go-json/internal/runtime
 # github.com/gogo/protobuf v1.3.2
 ## explicit; go 1.15
 github.com/gogo/protobuf/gogoproto
@@ -179,7 +193,7 @@ github.com/gopacket/gopacket/layers
 # github.com/gorilla/websocket v1.5.0
 ## explicit; go 1.12
 github.com/gorilla/websocket
-# github.com/grpc-ecosystem/grpc-gateway/v2 v2.22.0
+# github.com/grpc-ecosystem/grpc-gateway/v2 v2.23.0
 ## explicit; go 1.21
 github.com/grpc-ecosystem/grpc-gateway/v2/internal/httprule
 github.com/grpc-ecosystem/grpc-gateway/v2/runtime
@@ -202,8 +216,8 @@ github.com/jpillora/backoff
 # github.com/json-iterator/go v1.1.12
 ## explicit; go 1.12
 github.com/json-iterator/go
-# github.com/klauspost/compress v1.17.9
-## explicit; go 1.20
+# github.com/klauspost/compress v1.17.11
+## explicit; go 1.21
 github.com/klauspost/compress
 github.com/klauspost/compress/flate
 github.com/klauspost/compress/fse
@@ -216,7 +230,7 @@ github.com/klauspost/compress/s2
 github.com/klauspost/compress/snappy
 github.com/klauspost/compress/zstd
 github.com/klauspost/compress/zstd/internal/xxhash
-# github.com/klauspost/cpuid/v2 v2.2.6
+# github.com/klauspost/cpuid/v2 v2.2.8
 ## explicit; go 1.15
 github.com/klauspost/cpuid/v2
 # github.com/libp2p/go-reuseport v0.3.0
@@ -236,9 +250,10 @@ github.com/mdlayher/ethernet
 # github.com/minio/md5-simd v1.1.2
 ## explicit; go 1.14
 github.com/minio/md5-simd
-# github.com/minio/minio-go/v7 v7.0.69
-## explicit; go 1.21
+# github.com/minio/minio-go/v7 v7.0.82
+## explicit; go 1.22
 github.com/minio/minio-go/v7
+github.com/minio/minio-go/v7/pkg/cors
 github.com/minio/minio-go/v7/pkg/credentials
 github.com/minio/minio-go/v7/pkg/encrypt
 github.com/minio/minio-go/v7/pkg/lifecycle
@@ -249,9 +264,6 @@ github.com/minio/minio-go/v7/pkg/set
 github.com/minio/minio-go/v7/pkg/signer
 github.com/minio/minio-go/v7/pkg/sse
 github.com/minio/minio-go/v7/pkg/tags
-# github.com/minio/sha256-simd v1.0.1
-## explicit; go 1.17
-github.com/minio/sha256-simd
 # github.com/mitchellh/mapstructure v1.5.0
 ## explicit; go 1.14
 github.com/mitchellh/mapstructure
@@ -274,7 +286,7 @@ github.com/mwitkow/go-conntrack
 # github.com/mxk/go-flowrate v0.0.0-20140419014527-cca7078d478f
 ## explicit
 github.com/mxk/go-flowrate/flowrate
-# github.com/netobserv/flowlogs-pipeline v1.6.1-crc0.0.20240930010330-bc3d4ebb4a88
+# github.com/netobserv/flowlogs-pipeline v1.7.0-community.0.20241217113023-fa0540a1658e
 ## explicit; go 1.22.3
 github.com/netobserv/flowlogs-pipeline/pkg/api
 github.com/netobserv/flowlogs-pipeline/pkg/config
@@ -282,6 +294,7 @@ github.com/netobserv/flowlogs-pipeline/pkg/operational
 github.com/netobserv/flowlogs-pipeline/pkg/pipeline
 github.com/netobserv/flowlogs-pipeline/pkg/pipeline/decode
 github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode
+github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/metrics
 github.com/netobserv/flowlogs-pipeline/pkg/pipeline/encode/opentelemetry
 github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract
 github.com/netobserv/flowlogs-pipeline/pkg/pipeline/extract/aggregate
@@ -301,6 +314,7 @@ github.com/netobserv/flowlogs-pipeline/pkg/pipeline/write/grpc/genericmap
 github.com/netobserv/flowlogs-pipeline/pkg/prometheus
 github.com/netobserv/flowlogs-pipeline/pkg/server
 github.com/netobserv/flowlogs-pipeline/pkg/utils
+github.com/netobserv/flowlogs-pipeline/pkg/utils/filters
 # github.com/netobserv/gopipes v0.3.0
 ## explicit; go 1.18
 github.com/netobserv/gopipes/pkg/node
@@ -436,8 +450,8 @@ github.com/prometheus/prometheus/tsdb/errors
 github.com/prometheus/prometheus/tsdb/fileutil
 github.com/prometheus/prometheus/tsdb/tsdbutil
 github.com/prometheus/prometheus/util/strutil
-# github.com/rs/xid v1.5.0
-## explicit; go 1.12
+# github.com/rs/xid v1.6.0
+## explicit; go 1.16
 github.com/rs/xid
 # github.com/russross/blackfriday/v2 v2.1.0
 ## explicit
@@ -554,8 +568,8 @@ github.com/xdg-go/stringprep
 # github.com/xrash/smetrics v0.0.0-20240312152122-5f08fbb34913
 ## explicit; go 1.15.0
 github.com/xrash/smetrics
-# go.opentelemetry.io/otel v1.29.0
-## explicit; go 1.21
+# go.opentelemetry.io/otel v1.32.0
+## explicit; go 1.22
 go.opentelemetry.io/otel
 go.opentelemetry.io/otel/attribute
 go.opentelemetry.io/otel/baggage
@@ -567,8 +581,8 @@ go.opentelemetry.io/otel/internal/global
 go.opentelemetry.io/otel/propagation
 go.opentelemetry.io/otel/semconv/v1.21.0
 go.opentelemetry.io/otel/semconv/v1.26.0
-# go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.29.0
-## explicit; go 1.21
+# go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.32.0
+## explicit; go 1.22
 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc
 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal
 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc/internal/envconfig
@@ -601,29 +615,29 @@ go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal/envconfig
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal/otlpconfig
 go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp/internal/retry
-# go.opentelemetry.io/otel/metric v1.29.0
-## explicit; go 1.21
+# go.opentelemetry.io/otel/metric v1.32.0
+## explicit; go 1.22
 go.opentelemetry.io/otel/metric
 go.opentelemetry.io/otel/metric/embedded
 go.opentelemetry.io/otel/metric/noop
-# go.opentelemetry.io/otel/sdk v1.29.0
-## explicit; go 1.21
+# go.opentelemetry.io/otel/sdk v1.32.0
+## explicit; go 1.22
 go.opentelemetry.io/otel/sdk
 go.opentelemetry.io/otel/sdk/instrumentation
 go.opentelemetry.io/otel/sdk/internal/env
 go.opentelemetry.io/otel/sdk/internal/x
 go.opentelemetry.io/otel/sdk/resource
 go.opentelemetry.io/otel/sdk/trace
-# go.opentelemetry.io/otel/sdk/metric v1.29.0
-## explicit; go 1.21
+# go.opentelemetry.io/otel/sdk/metric v1.32.0
+## explicit; go 1.22
 go.opentelemetry.io/otel/sdk/metric
+go.opentelemetry.io/otel/sdk/metric/exemplar
 go.opentelemetry.io/otel/sdk/metric/internal
 go.opentelemetry.io/otel/sdk/metric/internal/aggregate
-go.opentelemetry.io/otel/sdk/metric/internal/exemplar
 go.opentelemetry.io/otel/sdk/metric/internal/x
 go.opentelemetry.io/otel/sdk/metric/metricdata
-# go.opentelemetry.io/otel/trace v1.29.0
-## explicit; go 1.21
+# go.opentelemetry.io/otel/trace v1.32.0
+## explicit; go 1.22
 go.opentelemetry.io/otel/trace
 go.opentelemetry.io/otel/trace/embedded
 go.opentelemetry.io/otel/trace/noop
@@ -640,7 +654,7 @@ go.opentelemetry.io/proto/otlp/trace/v1
 # go.uber.org/atomic v1.9.0
 ## explicit; go 1.13
 go.uber.org/atomic
-# golang.org/x/crypto v0.26.0
+# golang.org/x/crypto v0.31.0
 ## explicit; go 1.20
 golang.org/x/crypto/argon2
 golang.org/x/crypto/blake2b
@@ -650,7 +664,7 @@ golang.org/x/crypto/curve25519
 # golang.org/x/exp v0.0.0-20240506185415-9bf2ced13842
 ## explicit; go 1.20
 golang.org/x/exp/constraints
-# golang.org/x/net v0.28.0
+# golang.org/x/net v0.30.0
 ## explicit; go 1.18
 golang.org/x/net/context
 golang.org/x/net/html
@@ -666,22 +680,22 @@ golang.org/x/net/proxy
 golang.org/x/net/publicsuffix
 golang.org/x/net/trace
 golang.org/x/net/websocket
-# golang.org/x/oauth2 v0.22.0
+# golang.org/x/oauth2 v0.23.0
 ## explicit; go 1.18
 golang.org/x/oauth2
 golang.org/x/oauth2/clientcredentials
 golang.org/x/oauth2/internal
-# golang.org/x/sys v0.26.0
+# golang.org/x/sys v0.28.0
 ## explicit; go 1.18
 golang.org/x/sys/cpu
 golang.org/x/sys/plan9
 golang.org/x/sys/unix
 golang.org/x/sys/windows
 golang.org/x/sys/windows/registry
-# golang.org/x/term v0.23.0
+# golang.org/x/term v0.27.0
 ## explicit; go 1.18
 golang.org/x/term
-# golang.org/x/text v0.17.0
+# golang.org/x/text v0.21.0
 ## explicit; go 1.18
 golang.org/x/text/secure/bidirule
 golang.org/x/text/transform
@@ -690,10 +704,10 @@ golang.org/x/text/unicode/norm
 # golang.org/x/time v0.5.0
 ## explicit; go 1.18
 golang.org/x/time/rate
-# google.golang.org/genproto/googleapis/api v0.0.0-20240822170219-fc7c04adadcd
+# google.golang.org/genproto/googleapis/api v0.0.0-20241104194629-dd2ea8efbc28
 ## explicit; go 1.21
 google.golang.org/genproto/googleapis/api/httpbody
-# google.golang.org/genproto/googleapis/rpc v0.0.0-20240822170219-fc7c04adadcd
+# google.golang.org/genproto/googleapis/rpc v0.0.0-20241104194629-dd2ea8efbc28
 ## explicit; go 1.21
 google.golang.org/genproto/googleapis/rpc/errdetails
 google.golang.org/genproto/googleapis/rpc/status
@@ -811,9 +825,6 @@ gopkg.in/gcfg.v1/types
 # gopkg.in/inf.v0 v0.9.1
 ## explicit
 gopkg.in/inf.v0
-# gopkg.in/ini.v1 v1.67.0
-## explicit
-gopkg.in/ini.v1
 # gopkg.in/natefinch/lumberjack.v2 v2.2.1
 ## explicit; go 1.13
 gopkg.in/natefinch/lumberjack.v2
-- 
GitLab