package main import ( "flag" "fmt" "math/big" "os" "sort" pump_parser "github.com/thloyi/pump-parser" ) type sizeStats struct { total uint64 items map[string]uint64 } type txInnerDataStat struct { TxOrdinal int BlockIndex int IndexWithinBlock uint32 Slot uint64 Bytes uint64 InstructionCount int } func newSizeStats() *sizeStats { return &sizeStats{items: make(map[string]uint64)} } func (s *sizeStats) add(name string, n uint64) { s.items[name] += n s.total += n } func main() { filePath := flag.String("file", "testdata/rawtx-binary/rawtx-blocks-414696178-414696182.prbs", "path to RawTxBlocksBinary .prbs file") flag.Parse() raw, err := os.ReadFile(*filePath) if err != nil { fmt.Fprintf(os.Stderr, "read file: %v\n", err) os.Exit(1) } var blocks pump_parser.RawTxBlocksBinary if err := blocks.UnmarshalBinary(raw); err != nil { fmt.Fprintf(os.Stderr, "decode rawtx blocks binary: %v\n", err) os.Exit(1) } stats := analyzeRawTxBlocksBinary(&blocks) if stats.total != uint64(len(raw)) { fmt.Fprintf(os.Stderr, "size accounting mismatch: accounted=%d file=%d\n", stats.total, len(raw)) os.Exit(1) } printReport(*filePath, len(raw), &blocks, stats) fmt.Println() printInnerInstructionDataDistribution(&blocks) fmt.Println() printBalanceAnalysis(&blocks) } func analyzeRawTxBlocksBinary(blocks *pump_parser.RawTxBlocksBinary) *sizeStats { stats := newSizeStats() stats.add("file.magic", 4) stats.add("file.schema_version", 2) stats.add("address_table.count", 4) stats.add("address_table.pubkeys", uint64(len(blocks.AddressTable))*32) stats.add("blocks.count", 4) stats.add("blocks.block_time", uint64(len(blocks.BlockTimes))*8) stats.add("blocks.tx_count", uint64(len(blocks.BlockTxCounts))*4) stats.add("txs.total_count", 4) for i := range blocks.Txs { addTx(stats, &blocks.Txs[i]) } return stats } func addTx(stats *sizeStats, tx *pump_parser.RawTxBinary) { stats.add("tx.index_within_block", 4) stats.add("tx.slot", 8) stats.add("tx.version", 1) stats.add("tx.account_key_count", 4) stats.add("tx.account_list.count", 4) stats.add("tx.account_list.refs", uint64(len(tx.AccountList))*4) addMeta(stats, &tx.Meta) addTransaction(stats, &tx.Transaction) } func addMeta(stats *sizeStats, meta *pump_parser.RawTxMetaBinary) { addErr(stats, meta.Err) stats.add("meta.fee", 8) addInnerInstructions(stats, meta.InnerInstructions) addLamportBalances(stats, meta.PreBalances, meta.PostBalances) addTokenBalances(stats, "meta.token_balances", meta.TokenBalances) stats.add("meta.compute_units_consumed", 8) } func addErr(stats *sizeStats, errValue *pump_parser.TransactionParsedError) { stats.add("meta.err.present", 1) if errValue == nil { return } stats.add("meta.err.index", 1) stats.add("meta.err.variant", 4) stats.add("meta.err.enum", 4) stats.add("meta.err.custom_code", 4) } func addTransaction(stats *sizeStats, tx *pump_parser.RawTxTransactionBinary) { stats.add("transaction.signature.present", 1) if tx.HasSignature { stats.add("transaction.signature.first", 64) } addHeader(stats) addInstructions(stats, "transaction.instructions", tx.Message.Instructions) addAddressTableLookups(stats, tx.Message.AddressTableLookups) } func addHeader(stats *sizeStats) { stats.add("transaction.header.num_readonly_signed", 4) stats.add("transaction.header.num_readonly_unsigned", 4) stats.add("transaction.header.num_required_signatures", 4) } func addInnerInstructions(stats *sizeStats, values []pump_parser.InnerInstructions) { stats.add("meta.inner_instructions.count", 4) for _, value := range values { stats.add("meta.inner_instructions.index", 4) addInstructions(stats, "meta.inner_instructions.instructions", value.Instructions) } } func addInstructions(stats *sizeStats, prefix string, values []pump_parser.Instruction) { stats.add(prefix+".count", 4) for _, value := range values { stats.add(prefix+".program_id_index", 2) stats.add(prefix+".accounts.count", 4) stats.add(prefix+".accounts.refs", uint64(len(value.Accounts))*2) stats.add(prefix+".data.length", 4) stats.add(prefix+".data.bytes", uint64(len(value.Data))) stats.add(prefix+".stack_height.present", 1) if value.StackHeight != nil { stats.add(prefix+".stack_height.value", 4) } } } func addAddressTableLookups(stats *sizeStats, values []pump_parser.RawTxAddressTableLookupBinary) { stats.add("transaction.address_table_lookups.count", 4) for _, value := range values { stats.add("transaction.address_table_lookups.account_key", 4) stats.add("transaction.address_table_lookups.writable.count", 4) stats.add("transaction.address_table_lookups.writable.indexes", uint64(len(value.WritableIndexes))) stats.add("transaction.address_table_lookups.readonly.count", 4) stats.add("transaction.address_table_lookups.readonly.indexes", uint64(len(value.ReadonlyIndexes))) } } func addUint64Slice(stats *sizeStats, prefix string, count int) { stats.add(prefix+".count", 4) stats.add(prefix+".values", uint64(count)*8) } func addLamportBalances(stats *sizeStats, preBalances []uint64, postBalances []uint64) { stats.add("meta.pre_balances.count_uvarint", uint64(uvarintLen(uint64(len(preBalances))))) for _, value := range preBalances { stats.add("meta.pre_balances.value_uvarint", uint64(uvarintLen(value))) } n := len(preBalances) if len(postBalances) < n { n = len(postBalances) } changed := 0 for i := 0; i < n; i++ { if preBalances[i] != postBalances[i] { changed++ } } stats.add("meta.post_balance_changes.count_uvarint", uint64(uvarintLen(uint64(changed)))) for i := 0; i < n; i++ { if preBalances[i] == postBalances[i] { continue } stats.add("meta.post_balance_changes.index_uvarint", uint64(uvarintLen(uint64(i)))) stats.add("meta.post_balance_changes.delta_uvarint", uint64(zigzagDeltaUvarintLen(preBalances[i], postBalances[i]))) } } func addTokenBalances(stats *sizeStats, prefix string, values []pump_parser.RawTxTokenBalanceBinary) { stats.add(prefix+".count", 4) for _, value := range values { stats.add(prefix+".account_index", 2) stats.add(prefix+".mint_ref", 2) stats.add(prefix+".owner.present", 1) if value.HasOwnerAccount { stats.add(prefix+".owner_ref", 2) } stats.add(prefix+".program_id_ref", 2) stats.add(prefix+".decimals", 1) stats.add(prefix+".pre_amount.present", 1) if value.HasPreAmount { stats.add(prefix+".pre_amount.length", 1) stats.add(prefix+".pre_amount.bytes", uint64(uint256ByteLen(value.PreAmount))) } stats.add(prefix+".post_amount.present", 1) if value.HasPostAmount { stats.add(prefix+".post_amount.length", 1) stats.add(prefix+".post_amount.bytes", uint64(uint256ByteLen(value.PostAmount))) } } } func uint256ByteLen(value string) int { if value == "" || value == "0" { return 0 } amount, ok := new(big.Int).SetString(value, 10) if !ok || amount.Sign() <= 0 { return 0 } return len(amount.Bytes()) } func printReport(filePath string, fileSize int, blocks *pump_parser.RawTxBlocksBinary, stats *sizeStats) { type row struct { name string bytes uint64 } rows := make([]row, 0, len(stats.items)) for name, bytes := range stats.items { rows = append(rows, row{name: name, bytes: bytes}) } sort.Slice(rows, func(i, j int) bool { if rows[i].bytes == rows[j].bytes { return rows[i].name < rows[j].name } return rows[i].bytes > rows[j].bytes }) fmt.Printf("file=%s\n", filePath) fmt.Printf("bytes=%d\n", fileSize) fmt.Printf("schema_version=%d\n", blocks.SchemaVersion) fmt.Printf("blocks=%d\n", len(blocks.BlockTxCounts)) fmt.Printf("txs=%d\n", len(blocks.Txs)) fmt.Printf("address_table_entries=%d\n", len(blocks.AddressTable)) fmt.Println() fmt.Printf("%-56s %12s %8s\n", "field", "bytes", "pct") fmt.Printf("%-56s %12s %8s\n", "-----", "-----", "---") for _, row := range rows { fmt.Printf("%-56s %12d %7.2f%%\n", row.name, row.bytes, float64(row.bytes)*100/float64(fileSize)) } } func printInnerInstructionDataDistribution(blocks *pump_parser.RawTxBlocksBinary) { stats := collectInnerInstructionDataStats(blocks) values := make([]uint64, 0, len(stats)) var total uint64 var nonZero int for _, stat := range stats { values = append(values, stat.Bytes) total += stat.Bytes if stat.Bytes > 0 { nonZero++ } } sort.Slice(values, func(i, j int) bool { return values[i] < values[j] }) fmt.Println("inner_instruction_data_bytes_per_tx") fmt.Printf("txs=%d nonzero_txs=%d total_bytes=%d avg=%.2f\n", len(stats), nonZero, total, avg(total, len(stats))) if len(values) > 0 { fmt.Printf("min=%d p50=%d p75=%d p90=%d p95=%d p99=%d max=%d\n", values[0], percentile(values, 0.50), percentile(values, 0.75), percentile(values, 0.90), percentile(values, 0.95), percentile(values, 0.99), values[len(values)-1], ) } fmt.Println() fmt.Printf("%-16s %8s %8s\n", "bucket", "txs", "bytes") fmt.Printf("%-16s %8s %8s\n", "------", "---", "-----") for _, bucket := range innerDataBuckets(stats) { fmt.Printf("%-16s %8d %8d\n", bucket.label, bucket.count, bucket.bytes) } sort.Slice(stats, func(i, j int) bool { if stats[i].Bytes == stats[j].Bytes { return stats[i].TxOrdinal < stats[j].TxOrdinal } return stats[i].Bytes > stats[j].Bytes }) fmt.Println() fmt.Printf("%-6s %-5s %-8s %-12s %-8s %-10s\n", "rank", "block", "tx_index", "slot", "bytes", "inner_ix") fmt.Printf("%-6s %-5s %-8s %-12s %-8s %-10s\n", "----", "-----", "--------", "----", "-----", "--------") limit := 20 if len(stats) < limit { limit = len(stats) } for i := 0; i < limit; i++ { stat := stats[i] fmt.Printf("%-6d %-5d %-8d %-12d %-8d %-10d\n", i+1, stat.BlockIndex, stat.IndexWithinBlock, stat.Slot, stat.Bytes, stat.InstructionCount) } } func collectInnerInstructionDataStats(blocks *pump_parser.RawTxBlocksBinary) []txInnerDataStat { out := make([]txInnerDataStat, 0, len(blocks.Txs)) txOffset := 0 for blockIndex, count := range blocks.BlockTxCounts { for i := uint32(0); i < count; i++ { tx := &blocks.Txs[txOffset] var bytes uint64 var instructionCount int for _, inner := range tx.Meta.InnerInstructions { for _, instruction := range inner.Instructions { bytes += uint64(len(instruction.Data)) instructionCount++ } } out = append(out, txInnerDataStat{ TxOrdinal: txOffset, BlockIndex: blockIndex, IndexWithinBlock: tx.IndexWithinBlock, Slot: tx.Slot, Bytes: bytes, InstructionCount: instructionCount, }) txOffset++ } } return out } func percentile(values []uint64, p float64) uint64 { if len(values) == 0 { return 0 } idx := int(float64(len(values)-1) * p) return values[idx] } func avg(total uint64, count int) float64 { if count == 0 { return 0 } return float64(total) / float64(count) } type innerDataBucket struct { label string count int bytes uint64 } func innerDataBuckets(stats []txInnerDataStat) []innerDataBucket { buckets := []innerDataBucket{ {label: "0"}, {label: "1-63"}, {label: "64-127"}, {label: "128-255"}, {label: "256-511"}, {label: "512-1023"}, {label: "1024-2047"}, {label: "2048-4095"}, {label: "4096+"}, } for _, stat := range stats { index := 0 switch { case stat.Bytes == 0: index = 0 case stat.Bytes < 64: index = 1 case stat.Bytes < 128: index = 2 case stat.Bytes < 256: index = 3 case stat.Bytes < 512: index = 4 case stat.Bytes < 1024: index = 5 case stat.Bytes < 2048: index = 6 case stat.Bytes < 4096: index = 7 default: index = 8 } buckets[index].count++ buckets[index].bytes += stat.Bytes } return buckets } type balanceValueStats struct { name string count int unique int zeroCount int topValues []balanceTopValue fixedBytes uint64 uvarintBytes uint64 duplicateCount int } type balanceTopValue struct { value uint64 count int } type balancePairStats struct { txCount int pairCount int lengthMismatchTxs int unchangedCount int changedCount int currentFixedValueBytes uint64 bothUvarintBytes uint64 preUvarintPostDelta uint64 preUvarintChangedDeltas uint64 } func printBalanceAnalysis(blocks *pump_parser.RawTxBlocksBinary) { preValues := make([]uint64, 0) postValues := make([]uint64, 0) pairs := balancePairStats{} pairs.txCount = len(blocks.Txs) for _, tx := range blocks.Txs { preValues = append(preValues, tx.Meta.PreBalances...) postValues = append(postValues, tx.Meta.PostBalances...) preLen := len(tx.Meta.PreBalances) postLen := len(tx.Meta.PostBalances) if preLen != postLen { pairs.lengthMismatchTxs++ } n := preLen if postLen < n { n = postLen } pairs.currentFixedValueBytes += uint64(preLen+postLen) * 8 pairs.preUvarintChangedDeltas += uint64(uvarintLen(uint64(n))) for i := 0; i < preLen; i++ { pairs.bothUvarintBytes += uint64(uvarintLen(tx.Meta.PreBalances[i])) pairs.preUvarintPostDelta += uint64(uvarintLen(tx.Meta.PreBalances[i])) pairs.preUvarintChangedDeltas += uint64(uvarintLen(tx.Meta.PreBalances[i])) } for i := 0; i < postLen; i++ { pairs.bothUvarintBytes += uint64(uvarintLen(tx.Meta.PostBalances[i])) } for i := 0; i < n; i++ { pre := tx.Meta.PreBalances[i] post := tx.Meta.PostBalances[i] pairs.pairCount++ pairs.preUvarintPostDelta += uint64(zigzagDeltaUvarintLen(pre, post)) if pre == post { pairs.unchangedCount++ continue } pairs.changedCount++ pairs.preUvarintChangedDeltas += uint64(uvarintLen(uint64(i))) pairs.preUvarintChangedDeltas += uint64(zigzagDeltaUvarintLen(pre, post)) } } preStats := collectBalanceValueStats("pre_balances", preValues) postStats := collectBalanceValueStats("post_balances", postValues) combined := append(append([]uint64(nil), preValues...), postValues...) combinedStats := collectBalanceValueStats("pre+post_balances", combined) fmt.Println("balance_values_analysis") printBalanceValueStats(preStats) printBalanceValueStats(postStats) printBalanceValueStats(combinedStats) fmt.Println() fmt.Println("balance_encoding_estimates") fmt.Printf("txs=%d pairs=%d length_mismatch_txs=%d unchanged_pairs=%d changed_pairs=%d unchanged_pct=%.2f%%\n", pairs.txCount, pairs.pairCount, pairs.lengthMismatchTxs, pairs.unchangedCount, pairs.changedCount, float64(pairs.unchangedCount)*100/float64(maxInt(pairs.pairCount, 1)), ) printEstimate("current_fixed_uint64_values", pairs.currentFixedValueBytes, pairs.currentFixedValueBytes) printEstimate("both_values_uvarint", pairs.bothUvarintBytes, pairs.currentFixedValueBytes) printEstimate("pre_uvarint_post_delta_each_index", pairs.preUvarintPostDelta, pairs.currentFixedValueBytes) printEstimate("pre_uvarint_post_changed_delta_pairs", pairs.preUvarintChangedDeltas, pairs.currentFixedValueBytes) } func collectBalanceValueStats(name string, values []uint64) balanceValueStats { freq := make(map[uint64]int) var zeroCount int var uvarintBytes uint64 for _, value := range values { freq[value]++ if value == 0 { zeroCount++ } uvarintBytes += uint64(uvarintLen(value)) } top := make([]balanceTopValue, 0, len(freq)) var duplicateCount int for value, count := range freq { top = append(top, balanceTopValue{value: value, count: count}) if count > 1 { duplicateCount += count - 1 } } sort.Slice(top, func(i, j int) bool { if top[i].count == top[j].count { return top[i].value < top[j].value } return top[i].count > top[j].count }) if len(top) > 10 { top = top[:10] } return balanceValueStats{ name: name, count: len(values), unique: len(freq), zeroCount: zeroCount, topValues: top, fixedBytes: uint64(len(values)) * 8, uvarintBytes: uvarintBytes, duplicateCount: duplicateCount, } } func printBalanceValueStats(stats balanceValueStats) { fmt.Printf("%s: count=%d unique=%d duplicate_values=%d zero=%d zero_pct=%.2f%% fixed_bytes=%d uvarint_bytes=%d uvarint_saved=%.2f%%\n", stats.name, stats.count, stats.unique, stats.duplicateCount, stats.zeroCount, float64(stats.zeroCount)*100/float64(maxInt(stats.count, 1)), stats.fixedBytes, stats.uvarintBytes, savedPct(stats.fixedBytes, stats.uvarintBytes), ) fmt.Printf("%-22s %-8s %-8s\n", "value", "count", "pct") for _, item := range stats.topValues { fmt.Printf("%-22d %-8d %7.2f%%\n", item.value, item.count, float64(item.count)*100/float64(maxInt(stats.count, 1))) } } func printEstimate(name string, bytes uint64, baseline uint64) { fmt.Printf("%-38s %10d saved=%7.2f%%\n", name, bytes, savedPct(baseline, bytes)) } func savedPct(baseline uint64, value uint64) float64 { if baseline == 0 { return 0 } return (float64(baseline) - float64(value)) * 100 / float64(baseline) } func uvarintLen(value uint64) int { n := 1 for value >= 0x80 { value >>= 7 n++ } return n } func zigzagDeltaUvarintLen(pre uint64, post uint64) int { if post >= pre { return uvarintLen((post - pre) << 1) } return uvarintLen(((pre - post) << 1) - 1) } func maxInt(a int, b int) int { if a > b { return a } return b }