Submit Info #64979

Problem Lang User Status Time Memory
Polynomial Taylor Shift cpp anonymous AC 91 ms 20.33 MiB

ケース詳細
Name Status Time Memory
example_00 AC 1 ms 2.32 MiB
example_01 AC 1 ms 2.32 MiB
fft_killer_00 AC 91 ms 20.23 MiB
fft_killer_01 AC 90 ms 20.33 MiB
max_random_00 AC 90 ms 20.28 MiB
max_random_01 AC 90 ms 20.32 MiB
medium_00 AC 1 ms 2.32 MiB
medium_01 AC 2 ms 2.57 MiB
medium_02 AC 2 ms 2.47 MiB
medium_all_zero_00 AC 1 ms 2.32 MiB
medium_c_zero_00 AC 1 ms 2.32 MiB
random_00 AC 81 ms 17.75 MiB
random_01 AC 86 ms 19.11 MiB
random_02 AC 11 ms 4.32 MiB
small_00 AC 1 ms 2.32 MiB
small_01 AC 1 ms 2.32 MiB
small_02 AC 1 ms 2.32 MiB
small_03 AC 1 ms 2.32 MiB
small_04 AC 1 ms 2.32 MiB
small_05 AC 1 ms 2.32 MiB
small_06 AC 1 ms 2.32 MiB
small_07 AC 1 ms 2.32 MiB
small_08 AC 1 ms 2.32 MiB
small_09 AC 1 ms 2.32 MiB
small_10 AC 1 ms 2.32 MiB
small_11 AC 1 ms 2.32 MiB
small_12 AC 1 ms 2.32 MiB
small_13 AC 1 ms 2.32 MiB
small_14 AC 1 ms 2.32 MiB
small_15 AC 1 ms 2.32 MiB

#pragma region opt #pragma GCC target("avx2") #pragma GCC optimize("O3") // #pragma GCC optimize("unroll-loops") // #pragma GCC optimize("fast-math") #pragma endregion opt #pragma region header #define _GNU_SOURCE #include <stdbool.h> #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <assert.h> #include <limits.h> #include <math.h> #include <string.h> #include <time.h> #pragma endregion header #pragma region type /* signed integer */ typedef int8_t i8; typedef int16_t i16; typedef int32_t i32; typedef int64_t i64; typedef __int128_t i128; /* unsigned integer */ typedef uint8_t u8; typedef uint16_t u16; typedef uint32_t u32; typedef uint64_t u64; typedef __uint128_t u128; /* floating point number */ typedef float f32; typedef double f64; typedef long double f80; #pragma endregion type #pragma region macro #define MIN(a, b) (((a) < (b)) ? (a) : (b)) #define MAX(a, b) (((a) > (b)) ? (a) : (b)) #define SWAP(a, b) (((a) ^= (b)), ((b) ^= (a)), ((a) ^= (b))) #define POPCNT32(a) __builtin_popcount((a)) #define POPCNT64(a) __builtin_popcountll((a)) #define CTZ32(a) __builtin_ctz((a)) #define CLZ32(a) __builtin_clz((a)) #define CTZ64(a) __builtin_ctzll((a)) #define CLZ64(a) __builtin_clzll((a)) #define HAS_SINGLE_BIT32(a) (__builtin_popcount((a)) == (1)) #define HAS_SINGLE_BIT64(a) (__builtin_popcountll((a)) == (1)) #define MSB32(a) ((31) - __builtin_clz((a))) #define MSB64(a) ((63) - __builtin_clzll((a))) #define BIT_WIDTH32(a) ((a) ? ((32) - __builtin_clz((a))) : (0)) #define BIT_WIDTH64(a) ((a) ? ((64) - __builtin_clzll((a))) : (0)) #define LSBit(a) ((a) & (-(a))) #define CLSBit(a) ((a) & ((a) - (1))) #define BIT_CEIL32(a) ((!(a)) ? (1) : ((POPCNT32(a)) == (1) ? ((1u) << ((31) - CLZ32((a)))) : ((1u) << ((32) - CLZ32(a))))) #define BIT_CEIL64(a) ((!(a)) ? (1) : ((POPCNT64(a)) == (1) ? ((1ull) << ((63) - CLZ64((a)))) : ((1ull) << ((64) - CLZ64(a))))) #define BIT_FLOOR32(a) ((!(a)) ? (0) : ((1u) << ((31) - CLZ32((a))))) #define BIT_FLOOR64(a) ((!(a)) ? (0) : ((1ull) << ((63) - CLZ64((a))))) #define _ROTL64(x, s) (((x) << ((s) % (64))) | (((x) >> ((64) - ((s) % (64)))))) #define _ROTR64(x, s) (((x) >> ((s) % (64))) | (((x) << ((64) - ((s) % (64)))))) #define ROTL64(x, s) (((s) == (0)) ? (x) : ((((i128)(s)) < (0)) ? (_ROTR64((x), -(s))) : (_ROTL64((x), (s))))) #define ROTR64(x, s) (((s) == (0)) ? (x) : ((((i128)(s)) < (0)) ? (_ROTL64((x), -(s))) : (_ROTR64((x), (s))))) #pragma endregion macro #pragma region io static inline int read_int(void) { // -2147483648 ~ 2147483647 (> 10 ^ 9) int c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } static inline i32 in_i32(void) { // -2147483648 ~ 2147483647 (> 10 ^ 9) i32 c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } static inline u32 in_u32(void) { // 0 ~ 4294967295 (> 10 ^ 9) u32 c, x = 0; while (c = getchar_unlocked(), c < 48 || c > 57); while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return x; } static inline i64 in_i64(void) { // -9223372036854775808 ~ 9223372036854775807 (> 10 ^ 18) i64 c, x = 0, f = 1; while (c = getchar_unlocked(), c < 48 || c > 57) if (c == 45) f = -f; while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return f * x; } static inline u64 in_u64(void) { // 0 ~ 18446744073709551615 (> 10 ^ 19) u64 c, x = 0; while (c = getchar_unlocked(), c < 48 || c > 57); while (47 < c && c < 58) { x = x * 10 + c - 48; c = getchar_unlocked(); } return x; } static inline void write_int_inner(int x) { if (x >= 10) write_int_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void write_int(int x) { if (x < 0) { putchar_unlocked('-'); x = -x; } write_int_inner(x); } static inline void out_i32_inner(i32 x) { if (x >= 10) out_i32_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void out_i32(i32 x) { if (x < 0) { putchar_unlocked('-'); x = -x; } out_i32_inner(x); } static inline void out_u32(u32 x) { if (x >= 10) out_u32(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void out_i64_inner(i64 x) { if (x >= 10) out_i64_inner(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void out_i64(i64 x) { if (x < 0) { putchar_unlocked('-'); x = -x; } out_i64_inner(x); } static inline void out_u64(u64 x) { if (x >= 10) out_u64(x / 10); putchar_unlocked(x - x / 10 * 10 + 48); } static inline void NL(void) { putchar_unlocked('\n'); } static inline void SP(void) { putchar_unlocked(' '); } #pragma endregion io #pragma region montgomery_32bit typedef uint32_t m32; m32 _one_m32(u32 mod) { return -1u % mod + 1u; } m32 _r2_m32(u32 mod) { return (u64)(i64)-1 % mod + 1u; } m32 _inv_m32(u32 mod) { u32 inv = mod; for (int i = 0; i < 4; i++) inv *= 2 - mod * inv; return inv; } m32 _reduce_m32(u64 a, m32 inv, u32 mod) { i64 z = (a >> 32) - ((((u32)a * inv) * (u64)mod) >> 32); return z < 0 ? z + mod : z; } m32 to_m32(u32 a, m32 r2, m32 inv, u32 mod) { return _reduce_m32((u64)a * r2, inv, mod); } u32 from_m32(m32 A, m32 inv, u32 mod) { m32 t = _reduce_m32((u64)A, inv, mod) - mod; return t + (mod & -(t >> 31u)); } m32 add_m32(m32 A, m32 B, u32 mod2) { // assert(mod2 == (mod << 1)); A += B - mod2; A += mod2 & -(A >> 31u); return A; } m32 sub_m32(m32 A, m32 B, u32 mod2) { // assert(mod2 == (mod << 1)); A -= B; A += mod2 & -(A >> 31u); return A; } m32 min_m32(m32 A, u32 mod2) { // assert(mod2 == (mod << 1)); return sub_m32(0u, A, mod2); } m32 mul_m32(m32 A, m32 B, m32 inv, u32 mod) { return _reduce_m32((u64)A * B, inv, mod); } m32 pow_m32(m32 A, i64 n, m32 inv, u32 mod) { m32 ret = _one_m32(mod); while (n > 0) { if (n & 1) ret = mul_m32(ret, A, inv, mod); A = mul_m32(A, A, inv, mod); n >>= 1; } return ret; } m32 inv_m32(m32 A, m32 inv, u32 mod) { return pow_m32(A, (i64)mod - 2, inv, mod); } m32 div_m32(m32 A, m32 B, m32 inv, u32 mod) { /* assert(is_prime(mod)); */ return mul_m32(A, inv_m32(B, inv, mod), inv, mod); } m32 in_m32(m32 r2, m32 inv, u32 mod) { u32 c, a = 0; while (c = getchar_unlocked(), c < 48 || c > 57); while (47 < c && c < 58) { a = a * 10 + c - 48; c = getchar_unlocked(); } return to_m32(a, r2, inv, mod); } void out_m32(m32 A, m32 inv, u32 mod) { u32 a = from_m32(A, inv, mod); out_u32(a); } #pragma endregion montgomery_32bit #pragma region ntt998244353 const u32 mod = 998244353u; const u32 mod2 = 1996488706u; const m32 r2 = 932051910u; const m32 inv = 3296722945u; const m32 one = 301989884u; const m32 rev = 696254469u; const m32 gs[] = { 691295370, 307583142, 566821959, 878217029, 375146819, 138254384, 500602490, 79119218, 790898700, 978335284, 651424567, 308706579, 723000027, 474797508, 683394121, 44141573, 536892010, 945865189, 175417726, 536169764, 831722880, 721458245 }; const m32 igs[] = { 306948983, 888603487, 138723248, 65668869, 842568658, 953245971, 195169681, 118717521, 792052763, 828450244, 908724728, 218560432, 628507989, 248210924, 566568154, 6285593, 82571768, 49985074, 225413092, 349167278, 61514562, 763211248 }; void ntt(m32 *A, int A_len) { int h = 0; while (A_len > (1 << h)) h++; for (int ph = 1; ph <= h; ph++) { int w = 1 << (ph - 1); int p = 1 << (h - ph); m32 now = one; for (int s = 0; s < w; s++) { int offset = s << (h - ph + 1); for (int i = 0; i < p; i++) { m32 l = A[i + offset]; m32 r = mul_m32(A[i + offset + p], now, inv, mod); A[i + offset] = add_m32(l, r, mod2); A[i + offset + p] = sub_m32(l, r, mod2); } now = mul_m32(now, gs[CTZ32(~s)], inv, mod); } } } void intt(m32 *A, int A_len) { int h = 0; while (A_len > (1 << h)) h++; for (int ph = h; ph >= 1; ph--) { int w = 1 << (ph - 1); int p = 1 << (h - ph); m32 inow = one; for (int s = 0; s < w; s++) { int offset = s << (h - ph + 1); for (int i = 0; i < p; i++) { m32 l = A[i + offset]; m32 r = A[i + offset + p]; A[i + offset] = add_m32(l, r, mod2); A[i + offset + p] = mul_m32(sub_m32(l, r, mod2), inow, inv, mod); } inow = mul_m32(inow, igs[CTZ32(~s)], inv, mod); } } m32 inv2t = inv_m32(to_m32(A_len, r2, inv, mod), inv, mod); for (int i = 0; i < A_len; i++) A[i] = mul_m32(A[i], inv2t, inv, mod); } m32 *convolute(m32 *A, int A_len, m32 *B, int B_len) { int ret_len = A_len + B_len - 1; int c_len = BIT_CEIL32(ret_len); m32 *c = (m32 *)calloc(c_len, sizeof(m32)); m32 *d = (m32 *)calloc(c_len, sizeof(m32)); #ifdef LOCAL if (c == NULL || d == NULL) exit(EXIT_FAILURE); #endif memcpy(c, A, sizeof(m32) * A_len); memcpy(d, B, sizeof(m32) * B_len); ntt(c, c_len); ntt(d, c_len); for (int i = 0; i < c_len; i++) c[i] = mul_m32(c[i], d[i], inv, mod); free(d); intt(c, c_len); return c; } #pragma endregion ntt998244353 #pragma region fps_taylor_shift m32 _fact[1<<20]; m32 _inv_fact[1<<20]; m32 _inv_table[1<<20]; void pre_fact(int n) { _fact[0] = one; for (int i = 0; i <= n + 1; i++) _fact[i + 1] = mul_m32(_fact[i], to_m32(i + 1, r2, inv, mod), inv, mod); _inv_fact[n + 2] = inv_m32(_fact[n + 2], inv, mod); for (int i = n + 2; i > 0; i--) _inv_fact[i - 1] = mul_m32(_inv_fact[i], to_m32(i, r2, inv, mod), inv, mod); for (int i = 1; i <= n + 1; i++) _inv_table[i] = mul_m32(_inv_fact[i], _fact[i - 1], inv, mod); } m32 *fps_taylor_shift(m32 *A, int A_len, m32 C) { m32 *work0 = (m32 *)calloc(A_len, sizeof(m32)); m32 *ret = (m32 *)calloc(A_len, sizeof(m32)); #ifdef LOCAL if (ret == NULL || work0 == NULL) { exit(EXIT_FAILURE)); } #endif for (int i = 0; i < A_len; i++) ret[A_len - 1 - i] = mul_m32(A[i], _fact[i], inv, mod); work0[0] = one; for (int i = 1; i < A_len; i++) work0[i] = mul_m32(mul_m32(mul_m32(work0[i - 1], C, inv, mod), _inv_fact[i], inv, mod), _fact[i - 1], inv, mod); m32 *work1 = convolute(ret, A_len, work0, A_len); for (int i = 0; i < A_len; i++) ret[i] = mul_m32(work1[A_len - 1 - i], _inv_fact[i], inv, mod); free(work0); free(work1); return ret; } #pragma endregion fps_taylor_shift void Main(void) { m32 A[1 << 19]; int n = read_int(); pre_fact(n); m32 c = in_m32(r2, inv, mod); for (int i = 0; i < n; i++) A[i] = in_m32(r2, inv, mod); m32 *C = fps_taylor_shift(A, n, c); for (int i = 0; i < n; i++) { if (i) SP(); out_m32(C[i], inv, mod); } NL(); } int main(void) { Main(); return 0; }