Files

595 lines
17 KiB
Go
Raw Permalink Normal View History

2026-04-24 18:00:44 +08:00
package main
import (
"flag"
"fmt"
"math/big"
"os"
"sort"
pump_parser "github.com/thloyi/pump-parser"
)
type sizeStats struct {
total uint64
items map[string]uint64
}
type txInnerDataStat struct {
TxOrdinal int
BlockIndex int
IndexWithinBlock uint32
Slot uint64
Bytes uint64
InstructionCount int
}
func newSizeStats() *sizeStats {
return &sizeStats{items: make(map[string]uint64)}
}
func (s *sizeStats) add(name string, n uint64) {
s.items[name] += n
s.total += n
}
func main() {
filePath := flag.String("file", "testdata/rawtx-binary/rawtx-blocks-414696178-414696182.prbs", "path to RawTxBlocksBinary .prbs file")
flag.Parse()
raw, err := os.ReadFile(*filePath)
if err != nil {
fmt.Fprintf(os.Stderr, "read file: %v\n", err)
os.Exit(1)
}
var blocks pump_parser.RawTxBlocksBinary
if err := blocks.UnmarshalBinary(raw); err != nil {
fmt.Fprintf(os.Stderr, "decode rawtx blocks binary: %v\n", err)
os.Exit(1)
}
stats := analyzeRawTxBlocksBinary(&blocks)
if stats.total != uint64(len(raw)) {
fmt.Fprintf(os.Stderr, "size accounting mismatch: accounted=%d file=%d\n", stats.total, len(raw))
os.Exit(1)
}
printReport(*filePath, len(raw), &blocks, stats)
fmt.Println()
printInnerInstructionDataDistribution(&blocks)
fmt.Println()
printBalanceAnalysis(&blocks)
}
func analyzeRawTxBlocksBinary(blocks *pump_parser.RawTxBlocksBinary) *sizeStats {
stats := newSizeStats()
stats.add("file.magic", 4)
stats.add("file.schema_version", 2)
stats.add("address_table.count", 4)
stats.add("address_table.pubkeys", uint64(len(blocks.AddressTable))*32)
stats.add("blocks.count", 4)
stats.add("blocks.block_time", uint64(len(blocks.BlockTimes))*8)
stats.add("blocks.tx_count", uint64(len(blocks.BlockTxCounts))*4)
stats.add("txs.total_count", 4)
for i := range blocks.Txs {
addTx(stats, &blocks.Txs[i])
}
return stats
}
func addTx(stats *sizeStats, tx *pump_parser.RawTxBinary) {
stats.add("tx.index_within_block", 4)
stats.add("tx.slot", 8)
stats.add("tx.version", 1)
stats.add("tx.account_key_count", 4)
stats.add("tx.account_list.count", 4)
stats.add("tx.account_list.refs", uint64(len(tx.AccountList))*4)
addMeta(stats, &tx.Meta)
addTransaction(stats, &tx.Transaction)
}
func addMeta(stats *sizeStats, meta *pump_parser.RawTxMetaBinary) {
addErr(stats, meta.Err)
stats.add("meta.fee", 8)
addInnerInstructions(stats, meta.InnerInstructions)
addLamportBalances(stats, meta.PreBalances, meta.PostBalances)
addTokenBalances(stats, "meta.token_balances", meta.TokenBalances)
stats.add("meta.compute_units_consumed", 8)
}
func addErr(stats *sizeStats, errValue *pump_parser.TransactionParsedError) {
stats.add("meta.err.present", 1)
if errValue == nil {
return
}
stats.add("meta.err.index", 1)
stats.add("meta.err.variant", 4)
stats.add("meta.err.enum", 4)
stats.add("meta.err.custom_code", 4)
}
func addTransaction(stats *sizeStats, tx *pump_parser.RawTxTransactionBinary) {
stats.add("transaction.signature.present", 1)
if tx.HasSignature {
stats.add("transaction.signature.first", 64)
}
addHeader(stats)
addInstructions(stats, "transaction.instructions", tx.Message.Instructions)
addAddressTableLookups(stats, tx.Message.AddressTableLookups)
}
func addHeader(stats *sizeStats) {
stats.add("transaction.header.num_readonly_signed", 4)
stats.add("transaction.header.num_readonly_unsigned", 4)
stats.add("transaction.header.num_required_signatures", 4)
}
func addInnerInstructions(stats *sizeStats, values []pump_parser.InnerInstructions) {
stats.add("meta.inner_instructions.count", 4)
for _, value := range values {
stats.add("meta.inner_instructions.index", 4)
addInstructions(stats, "meta.inner_instructions.instructions", value.Instructions)
}
}
func addInstructions(stats *sizeStats, prefix string, values []pump_parser.Instruction) {
stats.add(prefix+".count", 4)
for _, value := range values {
2026-04-29 17:14:26 +08:00
stats.add(prefix+".program_id_index", 1)
2026-04-24 18:00:44 +08:00
stats.add(prefix+".accounts.count", 4)
2026-04-29 17:14:26 +08:00
stats.add(prefix+".accounts.refs", uint64(len(value.Accounts)))
2026-04-24 18:00:44 +08:00
stats.add(prefix+".data.length", 4)
stats.add(prefix+".data.bytes", uint64(len(value.Data)))
stats.add(prefix+".stack_height.present", 1)
if value.StackHeight != nil {
stats.add(prefix+".stack_height.value", 4)
}
2026-04-29 17:14:26 +08:00
stats.add(prefix+".log_events.count", 4)
for _, event := range value.LogEvents {
stats.add(prefix+".log_events.length", 4)
stats.add(prefix+".log_events.bytes", uint64(len(event)))
}
2026-04-24 18:00:44 +08:00
}
}
func addAddressTableLookups(stats *sizeStats, values []pump_parser.RawTxAddressTableLookupBinary) {
stats.add("transaction.address_table_lookups.count", 4)
for _, value := range values {
stats.add("transaction.address_table_lookups.account_key", 4)
stats.add("transaction.address_table_lookups.writable.count", 4)
stats.add("transaction.address_table_lookups.writable.indexes", uint64(len(value.WritableIndexes)))
stats.add("transaction.address_table_lookups.readonly.count", 4)
stats.add("transaction.address_table_lookups.readonly.indexes", uint64(len(value.ReadonlyIndexes)))
}
}
func addUint64Slice(stats *sizeStats, prefix string, count int) {
stats.add(prefix+".count", 4)
stats.add(prefix+".values", uint64(count)*8)
}
func addLamportBalances(stats *sizeStats, preBalances []uint64, postBalances []uint64) {
stats.add("meta.pre_balances.count_uvarint", uint64(uvarintLen(uint64(len(preBalances)))))
for _, value := range preBalances {
stats.add("meta.pre_balances.value_uvarint", uint64(uvarintLen(value)))
}
n := len(preBalances)
if len(postBalances) < n {
n = len(postBalances)
}
changed := 0
for i := 0; i < n; i++ {
if preBalances[i] != postBalances[i] {
changed++
}
}
stats.add("meta.post_balance_changes.count_uvarint", uint64(uvarintLen(uint64(changed))))
for i := 0; i < n; i++ {
if preBalances[i] == postBalances[i] {
continue
}
stats.add("meta.post_balance_changes.index_uvarint", uint64(uvarintLen(uint64(i))))
stats.add("meta.post_balance_changes.delta_uvarint", uint64(zigzagDeltaUvarintLen(preBalances[i], postBalances[i])))
}
}
func addTokenBalances(stats *sizeStats, prefix string, values []pump_parser.RawTxTokenBalanceBinary) {
stats.add(prefix+".count", 4)
for _, value := range values {
2026-04-29 17:14:26 +08:00
stats.add(prefix+".account_index", 1)
stats.add(prefix+".mint_ref", 1)
2026-04-24 18:00:44 +08:00
stats.add(prefix+".owner.present", 1)
if value.HasOwnerAccount {
2026-04-29 17:14:26 +08:00
stats.add(prefix+".owner_ref", 1)
2026-04-24 18:00:44 +08:00
}
2026-04-29 17:14:26 +08:00
stats.add(prefix+".program_id_ref", 1)
2026-04-24 18:00:44 +08:00
stats.add(prefix+".decimals", 1)
stats.add(prefix+".pre_amount.present", 1)
if value.HasPreAmount {
stats.add(prefix+".pre_amount.length", 1)
stats.add(prefix+".pre_amount.bytes", uint64(uint256ByteLen(value.PreAmount)))
}
stats.add(prefix+".post_amount.present", 1)
if value.HasPostAmount {
stats.add(prefix+".post_amount.length", 1)
stats.add(prefix+".post_amount.bytes", uint64(uint256ByteLen(value.PostAmount)))
}
}
}
func uint256ByteLen(value string) int {
if value == "" || value == "0" {
return 0
}
amount, ok := new(big.Int).SetString(value, 10)
if !ok || amount.Sign() <= 0 {
return 0
}
return len(amount.Bytes())
}
func printReport(filePath string, fileSize int, blocks *pump_parser.RawTxBlocksBinary, stats *sizeStats) {
type row struct {
name string
bytes uint64
}
rows := make([]row, 0, len(stats.items))
for name, bytes := range stats.items {
rows = append(rows, row{name: name, bytes: bytes})
}
sort.Slice(rows, func(i, j int) bool {
if rows[i].bytes == rows[j].bytes {
return rows[i].name < rows[j].name
}
return rows[i].bytes > rows[j].bytes
})
fmt.Printf("file=%s\n", filePath)
fmt.Printf("bytes=%d\n", fileSize)
fmt.Printf("schema_version=%d\n", blocks.SchemaVersion)
fmt.Printf("blocks=%d\n", len(blocks.BlockTxCounts))
fmt.Printf("txs=%d\n", len(blocks.Txs))
fmt.Printf("address_table_entries=%d\n", len(blocks.AddressTable))
fmt.Println()
fmt.Printf("%-56s %12s %8s\n", "field", "bytes", "pct")
fmt.Printf("%-56s %12s %8s\n", "-----", "-----", "---")
for _, row := range rows {
fmt.Printf("%-56s %12d %7.2f%%\n", row.name, row.bytes, float64(row.bytes)*100/float64(fileSize))
}
}
func printInnerInstructionDataDistribution(blocks *pump_parser.RawTxBlocksBinary) {
stats := collectInnerInstructionDataStats(blocks)
values := make([]uint64, 0, len(stats))
var total uint64
var nonZero int
for _, stat := range stats {
values = append(values, stat.Bytes)
total += stat.Bytes
if stat.Bytes > 0 {
nonZero++
}
}
sort.Slice(values, func(i, j int) bool { return values[i] < values[j] })
fmt.Println("inner_instruction_data_bytes_per_tx")
fmt.Printf("txs=%d nonzero_txs=%d total_bytes=%d avg=%.2f\n", len(stats), nonZero, total, avg(total, len(stats)))
if len(values) > 0 {
fmt.Printf("min=%d p50=%d p75=%d p90=%d p95=%d p99=%d max=%d\n",
values[0],
percentile(values, 0.50),
percentile(values, 0.75),
percentile(values, 0.90),
percentile(values, 0.95),
percentile(values, 0.99),
values[len(values)-1],
)
}
fmt.Println()
fmt.Printf("%-16s %8s %8s\n", "bucket", "txs", "bytes")
fmt.Printf("%-16s %8s %8s\n", "------", "---", "-----")
for _, bucket := range innerDataBuckets(stats) {
fmt.Printf("%-16s %8d %8d\n", bucket.label, bucket.count, bucket.bytes)
}
sort.Slice(stats, func(i, j int) bool {
if stats[i].Bytes == stats[j].Bytes {
return stats[i].TxOrdinal < stats[j].TxOrdinal
}
return stats[i].Bytes > stats[j].Bytes
})
fmt.Println()
fmt.Printf("%-6s %-5s %-8s %-12s %-8s %-10s\n", "rank", "block", "tx_index", "slot", "bytes", "inner_ix")
fmt.Printf("%-6s %-5s %-8s %-12s %-8s %-10s\n", "----", "-----", "--------", "----", "-----", "--------")
limit := 20
if len(stats) < limit {
limit = len(stats)
}
for i := 0; i < limit; i++ {
stat := stats[i]
fmt.Printf("%-6d %-5d %-8d %-12d %-8d %-10d\n", i+1, stat.BlockIndex, stat.IndexWithinBlock, stat.Slot, stat.Bytes, stat.InstructionCount)
}
}
func collectInnerInstructionDataStats(blocks *pump_parser.RawTxBlocksBinary) []txInnerDataStat {
out := make([]txInnerDataStat, 0, len(blocks.Txs))
txOffset := 0
for blockIndex, count := range blocks.BlockTxCounts {
for i := uint32(0); i < count; i++ {
tx := &blocks.Txs[txOffset]
var bytes uint64
var instructionCount int
for _, inner := range tx.Meta.InnerInstructions {
for _, instruction := range inner.Instructions {
bytes += uint64(len(instruction.Data))
instructionCount++
}
}
out = append(out, txInnerDataStat{
TxOrdinal: txOffset,
BlockIndex: blockIndex,
IndexWithinBlock: tx.IndexWithinBlock,
Slot: tx.Slot,
Bytes: bytes,
InstructionCount: instructionCount,
})
txOffset++
}
}
return out
}
func percentile(values []uint64, p float64) uint64 {
if len(values) == 0 {
return 0
}
idx := int(float64(len(values)-1) * p)
return values[idx]
}
func avg(total uint64, count int) float64 {
if count == 0 {
return 0
}
return float64(total) / float64(count)
}
type innerDataBucket struct {
label string
count int
bytes uint64
}
func innerDataBuckets(stats []txInnerDataStat) []innerDataBucket {
buckets := []innerDataBucket{
{label: "0"},
{label: "1-63"},
{label: "64-127"},
{label: "128-255"},
{label: "256-511"},
{label: "512-1023"},
{label: "1024-2047"},
{label: "2048-4095"},
{label: "4096+"},
}
for _, stat := range stats {
index := 0
switch {
case stat.Bytes == 0:
index = 0
case stat.Bytes < 64:
index = 1
case stat.Bytes < 128:
index = 2
case stat.Bytes < 256:
index = 3
case stat.Bytes < 512:
index = 4
case stat.Bytes < 1024:
index = 5
case stat.Bytes < 2048:
index = 6
case stat.Bytes < 4096:
index = 7
default:
index = 8
}
buckets[index].count++
buckets[index].bytes += stat.Bytes
}
return buckets
}
type balanceValueStats struct {
name string
count int
unique int
zeroCount int
topValues []balanceTopValue
fixedBytes uint64
uvarintBytes uint64
duplicateCount int
}
type balanceTopValue struct {
value uint64
count int
}
type balancePairStats struct {
txCount int
pairCount int
lengthMismatchTxs int
unchangedCount int
changedCount int
currentFixedValueBytes uint64
bothUvarintBytes uint64
preUvarintPostDelta uint64
preUvarintChangedDeltas uint64
}
func printBalanceAnalysis(blocks *pump_parser.RawTxBlocksBinary) {
preValues := make([]uint64, 0)
postValues := make([]uint64, 0)
pairs := balancePairStats{}
pairs.txCount = len(blocks.Txs)
for _, tx := range blocks.Txs {
preValues = append(preValues, tx.Meta.PreBalances...)
postValues = append(postValues, tx.Meta.PostBalances...)
preLen := len(tx.Meta.PreBalances)
postLen := len(tx.Meta.PostBalances)
if preLen != postLen {
pairs.lengthMismatchTxs++
}
n := preLen
if postLen < n {
n = postLen
}
pairs.currentFixedValueBytes += uint64(preLen+postLen) * 8
pairs.preUvarintChangedDeltas += uint64(uvarintLen(uint64(n)))
for i := 0; i < preLen; i++ {
pairs.bothUvarintBytes += uint64(uvarintLen(tx.Meta.PreBalances[i]))
pairs.preUvarintPostDelta += uint64(uvarintLen(tx.Meta.PreBalances[i]))
pairs.preUvarintChangedDeltas += uint64(uvarintLen(tx.Meta.PreBalances[i]))
}
for i := 0; i < postLen; i++ {
pairs.bothUvarintBytes += uint64(uvarintLen(tx.Meta.PostBalances[i]))
}
for i := 0; i < n; i++ {
pre := tx.Meta.PreBalances[i]
post := tx.Meta.PostBalances[i]
pairs.pairCount++
pairs.preUvarintPostDelta += uint64(zigzagDeltaUvarintLen(pre, post))
if pre == post {
pairs.unchangedCount++
continue
}
pairs.changedCount++
pairs.preUvarintChangedDeltas += uint64(uvarintLen(uint64(i)))
pairs.preUvarintChangedDeltas += uint64(zigzagDeltaUvarintLen(pre, post))
}
}
preStats := collectBalanceValueStats("pre_balances", preValues)
postStats := collectBalanceValueStats("post_balances", postValues)
combined := append(append([]uint64(nil), preValues...), postValues...)
combinedStats := collectBalanceValueStats("pre+post_balances", combined)
fmt.Println("balance_values_analysis")
printBalanceValueStats(preStats)
printBalanceValueStats(postStats)
printBalanceValueStats(combinedStats)
fmt.Println()
fmt.Println("balance_encoding_estimates")
fmt.Printf("txs=%d pairs=%d length_mismatch_txs=%d unchanged_pairs=%d changed_pairs=%d unchanged_pct=%.2f%%\n",
pairs.txCount,
pairs.pairCount,
pairs.lengthMismatchTxs,
pairs.unchangedCount,
pairs.changedCount,
float64(pairs.unchangedCount)*100/float64(maxInt(pairs.pairCount, 1)),
)
printEstimate("current_fixed_uint64_values", pairs.currentFixedValueBytes, pairs.currentFixedValueBytes)
printEstimate("both_values_uvarint", pairs.bothUvarintBytes, pairs.currentFixedValueBytes)
printEstimate("pre_uvarint_post_delta_each_index", pairs.preUvarintPostDelta, pairs.currentFixedValueBytes)
printEstimate("pre_uvarint_post_changed_delta_pairs", pairs.preUvarintChangedDeltas, pairs.currentFixedValueBytes)
}
func collectBalanceValueStats(name string, values []uint64) balanceValueStats {
freq := make(map[uint64]int)
var zeroCount int
var uvarintBytes uint64
for _, value := range values {
freq[value]++
if value == 0 {
zeroCount++
}
uvarintBytes += uint64(uvarintLen(value))
}
top := make([]balanceTopValue, 0, len(freq))
var duplicateCount int
for value, count := range freq {
top = append(top, balanceTopValue{value: value, count: count})
if count > 1 {
duplicateCount += count - 1
}
}
sort.Slice(top, func(i, j int) bool {
if top[i].count == top[j].count {
return top[i].value < top[j].value
}
return top[i].count > top[j].count
})
if len(top) > 10 {
top = top[:10]
}
return balanceValueStats{
name: name,
count: len(values),
unique: len(freq),
zeroCount: zeroCount,
topValues: top,
fixedBytes: uint64(len(values)) * 8,
uvarintBytes: uvarintBytes,
duplicateCount: duplicateCount,
}
}
func printBalanceValueStats(stats balanceValueStats) {
fmt.Printf("%s: count=%d unique=%d duplicate_values=%d zero=%d zero_pct=%.2f%% fixed_bytes=%d uvarint_bytes=%d uvarint_saved=%.2f%%\n",
stats.name,
stats.count,
stats.unique,
stats.duplicateCount,
stats.zeroCount,
float64(stats.zeroCount)*100/float64(maxInt(stats.count, 1)),
stats.fixedBytes,
stats.uvarintBytes,
savedPct(stats.fixedBytes, stats.uvarintBytes),
)
fmt.Printf("%-22s %-8s %-8s\n", "value", "count", "pct")
for _, item := range stats.topValues {
fmt.Printf("%-22d %-8d %7.2f%%\n", item.value, item.count, float64(item.count)*100/float64(maxInt(stats.count, 1)))
}
}
func printEstimate(name string, bytes uint64, baseline uint64) {
fmt.Printf("%-38s %10d saved=%7.2f%%\n", name, bytes, savedPct(baseline, bytes))
}
func savedPct(baseline uint64, value uint64) float64 {
if baseline == 0 {
return 0
}
return (float64(baseline) - float64(value)) * 100 / float64(baseline)
}
func uvarintLen(value uint64) int {
n := 1
for value >= 0x80 {
value >>= 7
n++
}
return n
}
func zigzagDeltaUvarintLen(pre uint64, post uint64) int {
if post >= pre {
return uvarintLen((post - pre) << 1)
}
return uvarintLen(((pre - post) << 1) - 1)
}
func maxInt(a int, b int) int {
if a > b {
return a
}
return b
}